aboutsummaryrefslogtreecommitdiffstats
path: root/samples
diff options
context:
space:
mode:
Diffstat (limited to 'samples')
-rw-r--r--samples/Kconfig100
-rw-r--r--samples/Makefile16
-rw-r--r--samples/acrn/Makefile12
-rw-r--r--samples/acrn/guest.ld9
-rw-r--r--samples/acrn/payload.ld9
-rw-r--r--samples/acrn/vm-sample.c136
-rw-r--r--samples/auxdisplay/.gitignore3
-rw-r--r--samples/auxdisplay/Makefile10
-rw-r--r--samples/auxdisplay/cfag12864b-example.c2
-rw-r--r--samples/binderfs/.gitignore2
-rw-r--r--samples/binderfs/Makefile4
-rw-r--r--samples/binderfs/binderfs_example.c1
-rw-r--r--samples/bpf/.gitignore12
-rw-r--r--samples/bpf/Makefile249
-rw-r--r--samples/bpf/README.rst22
-rw-r--r--samples/bpf/bpf_insn.h30
-rw-r--r--samples/bpf/bpf_load.c687
-rw-r--r--samples/bpf/bpf_load.h58
-rw-r--r--samples/bpf/cookie_uid_helper_example.c35
-rw-r--r--samples/bpf/cpustat_kern.c36
-rw-r--r--samples/bpf/cpustat_user.c48
-rwxr-xr-xsamples/bpf/do_hbm_test.sh34
-rw-r--r--samples/bpf/fds_example.c35
-rw-r--r--samples/bpf/hbm.c125
-rw-r--r--samples/bpf/hbm_kern.h4
-rw-r--r--samples/bpf/ibumad_kern.c26
-rw-r--r--samples/bpf/ibumad_user.c74
-rw-r--r--samples/bpf/lathist_kern.c24
-rw-r--r--samples/bpf/lathist_user.c42
-rwxr-xr-x[-rw-r--r--]samples/bpf/lwt_len_hist.sh2
-rw-r--r--samples/bpf/lwt_len_hist_kern.c7
-rw-r--r--samples/bpf/lwt_len_hist_user.c2
-rw-r--r--samples/bpf/map_perf_test_kern.c232
-rw-r--r--samples/bpf/map_perf_test_user.c168
-rw-r--r--samples/bpf/offwaketime_kern.c64
-rw-r--r--samples/bpf/offwaketime_user.c65
-rw-r--r--samples/bpf/sampleip_kern.c13
-rw-r--r--samples/bpf/sampleip_user.c95
-rw-r--r--samples/bpf/sock_example.c17
-rw-r--r--samples/bpf/sockex1_user.c15
-rw-r--r--samples/bpf/sockex2_kern.c4
-rw-r--r--samples/bpf/sockex2_user.c17
-rw-r--r--samples/bpf/sockex3_kern.c52
-rw-r--r--samples/bpf/sockex3_user.c67
-rw-r--r--samples/bpf/spintest_kern.c36
-rw-r--r--samples/bpf/spintest_user.c67
-rw-r--r--samples/bpf/syscall_tp_kern.c24
-rw-r--r--samples/bpf/syscall_tp_user.c60
-rw-r--r--samples/bpf/task_fd_query_kern.c2
-rw-r--r--samples/bpf/task_fd_query_user.c108
-rw-r--r--samples/bpf/test_cgrp2_array_pin.c4
-rw-r--r--samples/bpf/test_cgrp2_attach.c23
-rw-r--r--samples/bpf/test_cgrp2_sock.c8
-rw-r--r--samples/bpf/test_cgrp2_sock2.c61
-rwxr-xr-xsamples/bpf/test_cgrp2_sock2.sh21
-rw-r--r--samples/bpf/test_current_task_under_cgroup_kern.c27
-rw-r--r--samples/bpf/test_current_task_under_cgroup_user.c52
-rwxr-xr-xsamples/bpf/test_ipip.sh179
-rw-r--r--samples/bpf/test_lru_dist.c17
-rwxr-xr-x[-rw-r--r--]samples/bpf/test_lwt_bpf.sh0
-rw-r--r--samples/bpf/test_map_in_map_kern.c93
-rw-r--r--samples/bpf/test_map_in_map_user.c56
-rw-r--r--samples/bpf/test_overhead_kprobe_kern.c17
-rw-r--r--samples/bpf/test_overhead_tp_kern.c5
-rw-r--r--samples/bpf/test_overhead_user.c85
-rwxr-xr-xsamples/bpf/test_override_return.sh1
-rw-r--r--samples/bpf/test_probe_write_user_kern.c21
-rw-r--r--samples/bpf/test_probe_write_user_user.c49
-rw-r--r--samples/bpf/trace_common.h13
-rw-r--r--samples/bpf/trace_event_kern.c25
-rw-r--r--samples/bpf/trace_event_user.c138
-rw-r--r--samples/bpf/trace_output_kern.c15
-rw-r--r--samples/bpf/trace_output_user.c57
-rw-r--r--samples/bpf/tracex1_kern.c13
-rw-r--r--samples/bpf/tracex1_user.c38
-rw-r--r--samples/bpf/tracex2_kern.c27
-rw-r--r--samples/bpf/tracex2_user.c50
-rw-r--r--samples/bpf/tracex3_kern.c26
-rw-r--r--samples/bpf/tracex3_user.c58
-rw-r--r--samples/bpf/tracex4_kern.c12
-rw-r--r--samples/bpf/tracex4_user.c52
-rw-r--r--samples/bpf/tracex5_kern.c18
-rw-r--r--samples/bpf/tracex5_user.c68
-rw-r--r--samples/bpf/tracex6_kern.c38
-rw-r--r--samples/bpf/tracex6_user.c49
-rw-r--r--samples/bpf/tracex7_user.c44
-rw-r--r--samples/bpf/xdp1_kern.c13
-rw-r--r--samples/bpf/xdp1_user.c35
-rw-r--r--samples/bpf/xdp2_kern.c13
-rw-r--r--samples/bpf/xdp2skb_meta_kern.c2
-rw-r--r--samples/bpf/xdp_adjust_tail_user.c32
-rw-r--r--samples/bpf/xdp_fwd_user.c86
-rw-r--r--samples/bpf/xdp_monitor.bpf.c8
-rw-r--r--samples/bpf/xdp_monitor_kern.c257
-rw-r--r--samples/bpf/xdp_monitor_user.c737
-rw-r--r--samples/bpf/xdp_redirect.bpf.c49
-rw-r--r--samples/bpf/xdp_redirect_cpu.bpf.c (renamed from samples/bpf/xdp_redirect_cpu_kern.c)388
-rw-r--r--samples/bpf/xdp_redirect_cpu_user.c1063
-rw-r--r--samples/bpf/xdp_redirect_kern.c90
-rw-r--r--samples/bpf/xdp_redirect_map.bpf.c97
-rw-r--r--samples/bpf/xdp_redirect_map_kern.c92
-rw-r--r--samples/bpf/xdp_redirect_map_multi.bpf.c77
-rw-r--r--samples/bpf/xdp_redirect_map_multi_user.c232
-rw-r--r--samples/bpf/xdp_redirect_map_user.c324
-rw-r--r--samples/bpf/xdp_redirect_user.c277
-rw-r--r--samples/bpf/xdp_router_ipv4.bpf.c189
-rw-r--r--samples/bpf/xdp_router_ipv4_kern.c186
-rw-r--r--samples/bpf/xdp_router_ipv4_user.c446
-rw-r--r--samples/bpf/xdp_rxq_info_user.c74
-rw-r--r--samples/bpf/xdp_sample.bpf.c266
-rw-r--r--samples/bpf/xdp_sample.bpf.h141
-rw-r--r--samples/bpf/xdp_sample_pkts_kern.c14
-rw-r--r--samples/bpf/xdp_sample_pkts_user.c43
-rw-r--r--samples/bpf/xdp_sample_shared.h17
-rw-r--r--samples/bpf/xdp_sample_user.c1673
-rw-r--r--samples/bpf/xdp_sample_user.h110
-rw-r--r--samples/bpf/xdp_tx_iptunnel_kern.c2
-rw-r--r--samples/bpf/xdp_tx_iptunnel_user.c34
-rw-r--r--samples/bpf/xdpsock.h11
-rw-r--r--samples/bpf/xdpsock_kern.c24
-rw-r--r--samples/bpf/xdpsock_user.c1212
-rw-r--r--samples/configfs/configfs_sample.c61
-rw-r--r--samples/connector/.gitignore3
-rw-r--r--samples/connector/Makefile11
-rw-r--r--samples/coresight/Makefile4
-rw-r--r--samples/coresight/coresight-cfg-sample.c73
-rw-r--r--samples/fanotify/.gitignore1
-rw-r--r--samples/fanotify/Makefile5
-rw-r--r--samples/fanotify/fs-monitor.c142
-rw-r--r--samples/fprobe/Makefile3
-rw-r--r--samples/fprobe/fprobe_example.c148
-rw-r--r--samples/ftrace/Makefile2
-rw-r--r--samples/ftrace/ftrace-direct-modify.c62
-rw-r--r--samples/ftrace/ftrace-direct-multi-modify.c159
-rw-r--r--samples/ftrace/ftrace-direct-multi.c87
-rw-r--r--samples/ftrace/ftrace-direct-too.c39
-rw-r--r--samples/ftrace/ftrace-direct.c38
-rw-r--r--samples/ftrace/sample-trace-array.c24
-rw-r--r--samples/hidraw/.gitignore3
-rw-r--r--samples/hidraw/Makefile8
-rw-r--r--samples/hidraw/hid-example.c2
-rw-r--r--samples/hw_breakpoint/data_breakpoint.c11
-rw-r--r--samples/kdb/kdb_hello.c20
-rw-r--r--samples/kfifo/bytestream-example.c20
-rw-r--r--samples/kfifo/inttype-example.c20
-rw-r--r--samples/kfifo/record-example.c20
-rw-r--r--samples/kmemleak/Makefile3
-rw-r--r--samples/kmemleak/kmemleak-test.c99
-rw-r--r--samples/kobject/kobject-example.c4
-rw-r--r--samples/kobject/kset-example.c4
-rw-r--r--samples/kprobes/kprobe_example.c68
-rw-r--r--samples/kprobes/kretprobe_example.c13
-rw-r--r--samples/landlock/.gitignore1
-rw-r--r--samples/landlock/Makefile13
-rw-r--r--samples/landlock/sandboxer.c290
-rw-r--r--samples/livepatch/livepatch-shadow-fix1.c2
-rw-r--r--samples/livepatch/livepatch-shadow-fix2.c2
-rw-r--r--samples/mei/.gitignore3
-rw-r--r--samples/mei/Makefile9
-rw-r--r--samples/mei/mei-amt-version.c53
-rw-r--r--samples/mic/mpssd/.gitignore1
-rw-r--r--samples/mic/mpssd/Makefile28
-rwxr-xr-xsamples/mic/mpssd/micctrl162
-rwxr-xr-xsamples/mic/mpssd/mpss189
-rw-r--r--samples/mic/mpssd/mpssd.c1815
-rw-r--r--samples/mic/mpssd/mpssd.h89
-rw-r--r--samples/mic/mpssd/sysfs.c91
-rw-r--r--samples/nitro_enclaves/.gitignore2
-rw-r--r--samples/nitro_enclaves/Makefile16
-rw-r--r--samples/nitro_enclaves/ne_ioctl_sample.c882
-rw-r--r--samples/pidfd/.gitignore3
-rw-r--r--samples/pidfd/Makefile6
-rw-r--r--samples/pktgen/README.rst20
-rw-r--r--samples/pktgen/functions.sh9
-rw-r--r--samples/pktgen/parameters.sh20
-rwxr-xr-xsamples/pktgen/pktgen_bench_xmit_mode_netif_receive.sh22
-rwxr-xr-xsamples/pktgen/pktgen_bench_xmit_mode_queue_xmit.sh22
-rwxr-xr-xsamples/pktgen/pktgen_sample01_simple.sh34
-rwxr-xr-xsamples/pktgen/pktgen_sample02_multiqueue.sh38
-rwxr-xr-xsamples/pktgen/pktgen_sample03_burst_single_flow.sh23
-rwxr-xr-xsamples/pktgen/pktgen_sample04_many_flows.sh31
-rwxr-xr-xsamples/pktgen/pktgen_sample05_flow_per_thread.sh31
-rwxr-xr-xsamples/pktgen/pktgen_sample06_numa_awared_queue_irq_affinity.sh40
-rw-r--r--samples/qmi/qmi_sample_client.c14
-rw-r--r--samples/rust/Kconfig30
-rw-r--r--samples/rust/Makefile5
-rw-r--r--samples/rust/hostprogs/.gitignore3
-rw-r--r--samples/rust/hostprogs/Makefile5
-rw-r--r--samples/rust/hostprogs/a.rs7
-rw-r--r--samples/rust/hostprogs/b.rs5
-rw-r--r--samples/rust/hostprogs/single.rs12
-rw-r--r--samples/rust/rust_minimal.rs38
-rw-r--r--samples/seccomp/.gitignore9
-rw-r--r--samples/seccomp/Makefile42
-rw-r--r--samples/seccomp/bpf-helper.h8
-rw-r--r--samples/seccomp/dropper.c9
-rw-r--r--samples/timers/.gitignore3
-rw-r--r--samples/timers/Makefile16
-rw-r--r--samples/trace_events/Makefile2
-rw-r--r--samples/trace_events/trace-events-sample.c17
-rw-r--r--samples/trace_events/trace-events-sample.h94
-rw-r--r--samples/trace_events/trace_custom_sched.c60
-rw-r--r--samples/trace_events/trace_custom_sched.h96
-rw-r--r--samples/uhid/.gitignore2
-rw-r--r--samples/uhid/Makefile8
-rw-r--r--samples/uhid/uhid-example.c4
-rw-r--r--samples/user_events/Makefile5
-rw-r--r--samples/user_events/example.c102
-rw-r--r--samples/v4l/v4l2-pci-skeleton.c18
-rw-r--r--samples/vfio-mdev/mbochs.c345
-rw-r--r--samples/vfio-mdev/mdpy-defs.h2
-rw-r--r--samples/vfio-mdev/mdpy-fb.c13
-rw-r--r--samples/vfio-mdev/mdpy.c343
-rw-r--r--samples/vfio-mdev/mtty.c397
-rw-r--r--samples/vfs/.gitignore5
-rw-r--r--samples/vfs/Makefile10
-rw-r--r--samples/vfs/test-statx.c4
-rw-r--r--samples/watch_queue/.gitignore2
-rw-r--r--samples/watch_queue/Makefile4
-rw-r--r--samples/watch_queue/watch_test.c186
-rw-r--r--samples/watchdog/.gitignore3
-rw-r--r--samples/watchdog/Makefile9
222 files changed, 10124 insertions, 9653 deletions
diff --git a/samples/Kconfig b/samples/Kconfig
index 9d236c346de5..0d81c00289ee 100644
--- a/samples/Kconfig
+++ b/samples/Kconfig
@@ -6,11 +6,21 @@ menuconfig SAMPLES
if SAMPLES
+config SAMPLE_AUXDISPLAY
+ bool "auxdisplay sample"
+ depends on CC_CAN_LINK
+
config SAMPLE_TRACE_EVENTS
tristate "Build trace_events examples -- loadable modules only"
depends on EVENT_TRACING && m
help
- This build trace event example modules.
+ This builds the trace event example module.
+
+config SAMPLE_TRACE_CUSTOM_EVENTS
+ tristate "Build custom trace event example -- loadable modules only"
+ depends on EVENT_TRACING && m
+ help
+ This builds the custom trace event example module.
config SAMPLE_TRACE_PRINTK
tristate "Build trace_printk module - tests various trace_printk formats"
@@ -22,11 +32,20 @@ config SAMPLE_TRACE_PRINTK
config SAMPLE_FTRACE_DIRECT
tristate "Build register_ftrace_direct() example"
depends on DYNAMIC_FTRACE_WITH_DIRECT_CALLS && m
- depends on X86_64 # has x86_64 inlined asm
+ depends on HAVE_SAMPLE_FTRACE_DIRECT
help
This builds an ftrace direct function example
that hooks to wake_up_process and prints the parameters.
+config SAMPLE_FTRACE_DIRECT_MULTI
+ tristate "Build register_ftrace_direct_multi() example"
+ depends on DYNAMIC_FTRACE_WITH_DIRECT_CALLS && m
+ depends on HAVE_SAMPLE_FTRACE_DIRECT_MULTI
+ help
+ This builds an ftrace direct function example
+ that hooks to wake_up_process and schedule, and prints
+ the function addresses.
+
config SAMPLE_TRACE_ARRAY
tristate "Build sample module for kernel access to Ftrace instancess"
depends on EVENT_TRACING && m
@@ -60,6 +79,13 @@ config SAMPLE_HW_BREAKPOINT
help
This builds kernel hardware breakpoint example modules.
+config SAMPLE_FPROBE
+ tristate "Build fprobe examples -- loadable modules only"
+ depends on FPROBE && m
+ help
+ This builds a fprobe example module. This module has an option 'symbol'.
+ You can specify a probed symbol or symbols separated with ','.
+
config SAMPLE_KFIFO
tristate "Build kfifo examples -- loadable modules only"
depends on m
@@ -116,31 +142,57 @@ config SAMPLE_CONNECTOR
with it.
See also Documentation/driver-api/connector.rst
+config SAMPLE_FANOTIFY_ERROR
+ bool "Build fanotify error monitoring sample"
+ depends on FANOTIFY && CC_CAN_LINK && HEADERS_INSTALL
+ help
+ When enabled, this builds an example code that uses the
+ FAN_FS_ERROR fanotify mechanism to monitor filesystem
+ errors.
+ See also Documentation/admin-guide/filesystem-monitoring.rst.
+
config SAMPLE_HIDRAW
bool "hidraw sample"
- depends on HEADERS_INSTALL
+ depends on CC_CAN_LINK && HEADERS_INSTALL
+
+config SAMPLE_LANDLOCK
+ bool "Landlock example"
+ depends on CC_CAN_LINK && HEADERS_INSTALL
+ help
+ Build a simple Landlock sandbox manager able to start a process
+ restricted by a user-defined filesystem access control policy.
config SAMPLE_PIDFD
bool "pidfd sample"
- depends on HEADERS_INSTALL
+ depends on CC_CAN_LINK && HEADERS_INSTALL
config SAMPLE_SECCOMP
bool "Build seccomp sample code"
- depends on SECCOMP_FILTER && HEADERS_INSTALL
+ depends on SECCOMP_FILTER && CC_CAN_LINK && HEADERS_INSTALL
help
Build samples of seccomp filters using various methods of
BPF filter construction.
+config SAMPLE_TIMER
+ bool "Timer sample"
+ depends on CC_CAN_LINK && HEADERS_INSTALL
+
+config SAMPLE_UHID
+ bool "UHID sample"
+ depends on CC_CAN_LINK && HEADERS_INSTALL
+ help
+ Build UHID sample program.
+
config SAMPLE_VFIO_MDEV_MTTY
tristate "Build VFIO mtty example mediated device sample code -- loadable modules only"
- depends on VFIO_MDEV_DEVICE && m
+ depends on VFIO_MDEV && m
help
Build a virtual tty sample driver for use as a VFIO
mediated device
config SAMPLE_VFIO_MDEV_MDPY
tristate "Build VFIO mdpy example mediated device sample code -- loadable modules only"
- depends on VFIO_MDEV_DEVICE && m
+ depends on VFIO_MDEV && m
help
Build a virtual display sample driver for use as a VFIO
mediated device. It is a simple framebuffer and supports
@@ -157,7 +209,7 @@ config SAMPLE_VFIO_MDEV_MDPY_FB
config SAMPLE_VFIO_MDEV_MBOCHS
tristate "Build VFIO mdpy example mediated device sample code -- loadable modules only"
- depends on VFIO_MDEV_DEVICE && m
+ depends on VFIO_MDEV && m
select DMA_SHARED_BUFFER
help
Build a virtual display sample driver for use as a VFIO
@@ -171,14 +223,14 @@ config SAMPLE_VFIO_MDEV_MBOCHS
config SAMPLE_ANDROID_BINDERFS
bool "Build Android binderfs example"
- depends on CONFIG_ANDROID_BINDERFS
+ depends on CC_CAN_LINK && HEADERS_INSTALL
help
Builds a sample program to illustrate the use of the Android binderfs
filesystem.
config SAMPLE_VFS
bool "Build example programs that use new VFS system calls"
- depends on HEADERS_INSTALL
+ depends on CC_CAN_LINK && HEADERS_INSTALL
help
Build example userspace programs that use new VFS system calls such
as mount API and statx(). Note that this is restricted to the x86
@@ -187,8 +239,36 @@ config SAMPLE_VFS
config SAMPLE_INTEL_MEI
bool "Build example program working with intel mei driver"
depends on INTEL_MEI
+ depends on CC_CAN_LINK && HEADERS_INSTALL
help
Build a sample program to work with mei device.
+config SAMPLE_WATCHDOG
+ bool "watchdog sample"
+ depends on CC_CAN_LINK
+
+config SAMPLE_WATCH_QUEUE
+ bool "Build example watch_queue notification API consumer"
+ depends on CC_CAN_LINK && HEADERS_INSTALL
+ help
+ Build example userspace program to use the new mount_notify(),
+ sb_notify() syscalls and the KEYCTL_WATCH_KEY keyctl() function.
+
+config SAMPLE_CORESIGHT_SYSCFG
+ tristate "Build example loadable module for CoreSight config"
+ depends on CORESIGHT && m
+ help
+ Build an example loadable module that adds new CoreSight features
+ and configuration using the CoreSight system configuration API.
+ This demonstrates how a user may create their own CoreSight
+ configurations and easily load them into the system at runtime.
+
+source "samples/rust/Kconfig"
endif # SAMPLES
+
+config HAVE_SAMPLE_FTRACE_DIRECT
+ bool
+
+config HAVE_SAMPLE_FTRACE_DIRECT_MULTI
+ bool
diff --git a/samples/Makefile b/samples/Makefile
index f8f847b4f61f..9832ef3f8fcb 100644
--- a/samples/Makefile
+++ b/samples/Makefile
@@ -1,26 +1,38 @@
# SPDX-License-Identifier: GPL-2.0
# Makefile for Linux samples code
-OBJECT_FILES_NON_STANDARD := y
-obj-$(CONFIG_SAMPLE_ANDROID_BINDERFS) += binderfs/
+subdir-$(CONFIG_SAMPLE_AUXDISPLAY) += auxdisplay
+subdir-$(CONFIG_SAMPLE_ANDROID_BINDERFS) += binderfs
obj-$(CONFIG_SAMPLE_CONFIGFS) += configfs/
obj-$(CONFIG_SAMPLE_CONNECTOR) += connector/
+obj-$(CONFIG_SAMPLE_FANOTIFY_ERROR) += fanotify/
subdir-$(CONFIG_SAMPLE_HIDRAW) += hidraw
obj-$(CONFIG_SAMPLE_HW_BREAKPOINT) += hw_breakpoint/
obj-$(CONFIG_SAMPLE_KDB) += kdb/
obj-$(CONFIG_SAMPLE_KFIFO) += kfifo/
obj-$(CONFIG_SAMPLE_KOBJECT) += kobject/
obj-$(CONFIG_SAMPLE_KPROBES) += kprobes/
+subdir-$(CONFIG_SAMPLE_LANDLOCK) += landlock
obj-$(CONFIG_SAMPLE_LIVEPATCH) += livepatch/
subdir-$(CONFIG_SAMPLE_PIDFD) += pidfd
obj-$(CONFIG_SAMPLE_QMI_CLIENT) += qmi/
obj-$(CONFIG_SAMPLE_RPMSG_CLIENT) += rpmsg/
subdir-$(CONFIG_SAMPLE_SECCOMP) += seccomp
+subdir-$(CONFIG_SAMPLE_TIMER) += timers
obj-$(CONFIG_SAMPLE_TRACE_EVENTS) += trace_events/
+obj-$(CONFIG_SAMPLE_TRACE_CUSTOM_EVENTS) += trace_events/
obj-$(CONFIG_SAMPLE_TRACE_PRINTK) += trace_printk/
obj-$(CONFIG_SAMPLE_FTRACE_DIRECT) += ftrace/
+obj-$(CONFIG_SAMPLE_FTRACE_DIRECT_MULTI) += ftrace/
obj-$(CONFIG_SAMPLE_TRACE_ARRAY) += ftrace/
+subdir-$(CONFIG_SAMPLE_UHID) += uhid
obj-$(CONFIG_VIDEO_PCI_SKELETON) += v4l/
obj-y += vfio-mdev/
subdir-$(CONFIG_SAMPLE_VFS) += vfs
obj-$(CONFIG_SAMPLE_INTEL_MEI) += mei/
+subdir-$(CONFIG_SAMPLE_WATCHDOG) += watchdog
+subdir-$(CONFIG_SAMPLE_WATCH_QUEUE) += watch_queue
+obj-$(CONFIG_DEBUG_KMEMLEAK_TEST) += kmemleak/
+obj-$(CONFIG_SAMPLE_CORESIGHT_SYSCFG) += coresight/
+obj-$(CONFIG_SAMPLE_FPROBE) += fprobe/
+obj-$(CONFIG_SAMPLES_RUST) += rust/
diff --git a/samples/acrn/Makefile b/samples/acrn/Makefile
new file mode 100644
index 000000000000..c8e3ed9785e9
--- /dev/null
+++ b/samples/acrn/Makefile
@@ -0,0 +1,12 @@
+# SPDX-License-Identifier: GPL-2.0
+
+.PHONY: vm-sample
+
+vm-sample: vm-sample.o payload.o
+ $(CC) $^ -o $@
+
+payload.o: payload.ld guest16.o
+ $(LD) -T $< -o $@
+
+clean:
+ rm *.o vm-sample
diff --git a/samples/acrn/guest.ld b/samples/acrn/guest.ld
new file mode 100644
index 000000000000..5127c682bd22
--- /dev/null
+++ b/samples/acrn/guest.ld
@@ -0,0 +1,9 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+OUTPUT_FORMAT(binary)
+SECTIONS
+{
+ .start : { *(.start) }
+ .text : { *(.text*) }
+ .rodata : { *(.rodata) }
+ .data : { *(.data) }
+}
diff --git a/samples/acrn/payload.ld b/samples/acrn/payload.ld
new file mode 100644
index 000000000000..e8d9a498ad62
--- /dev/null
+++ b/samples/acrn/payload.ld
@@ -0,0 +1,9 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+SECTIONS
+{
+ .payload16 0 : {
+ guest16 = .;
+ guest16.o(.text)
+ guest16_end = .;
+ }
+}
diff --git a/samples/acrn/vm-sample.c b/samples/acrn/vm-sample.c
new file mode 100644
index 000000000000..b2dad47a77a0
--- /dev/null
+++ b/samples/acrn/vm-sample.c
@@ -0,0 +1,136 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * A sample program to run a User VM on the ACRN hypervisor
+ *
+ * This sample runs in a Service VM, which is a privileged VM of ACRN.
+ * CONFIG_ACRN_HSM need to be enabled in the Service VM.
+ *
+ * Guest VM code in guest16.s will be executed after the VM launched.
+ *
+ * Copyright (C) 2020 Intel Corporation. All rights reserved.
+ */
+#include <stdio.h>
+#include <stdint.h>
+#include <stdlib.h>
+#include <string.h>
+#include <malloc.h>
+#include <fcntl.h>
+#include <unistd.h>
+#include <signal.h>
+#include <sys/ioctl.h>
+#include <linux/acrn.h>
+
+#define GUEST_MEMORY_SIZE (1024*1024)
+void *guest_memory;
+
+extern const unsigned char guest16[], guest16_end[];
+static char io_request_page[4096] __attribute__((aligned(4096)));
+static struct acrn_io_request *io_req_buf = (struct acrn_io_request *)io_request_page;
+
+__u16 vcpu_num;
+__u16 vmid;
+/* POST_STANDARD_VM_UUID1, refer to https://github.com/projectacrn/acrn-hypervisor/blob/master/hypervisor/include/common/vm_uuids.h */
+guid_t vm_uuid = GUID_INIT(0x385479d2, 0xd625, 0xe811, 0x86, 0x4e, 0xcb, 0x7a, 0x18, 0xb3, 0x46, 0x43);
+
+int hsm_fd;
+int is_running = 1;
+
+void vm_exit(int sig)
+{
+ sig = sig;
+
+ is_running = 0;
+ ioctl(hsm_fd, ACRN_IOCTL_PAUSE_VM, vmid);
+ ioctl(hsm_fd, ACRN_IOCTL_DESTROY_IOREQ_CLIENT, 0);
+}
+
+int main(int argc, char **argv)
+{
+ int vcpu_id, ret;
+ struct acrn_vm_creation create_vm = {0};
+ struct acrn_vm_memmap ram_map = {0};
+ struct acrn_vcpu_regs regs;
+ struct acrn_io_request *io_req;
+ struct acrn_ioreq_notify __attribute__((aligned(8))) notify;
+
+ argc = argc;
+ argv = argv;
+
+ guest_memory = memalign(4096, GUEST_MEMORY_SIZE);
+ if (!guest_memory) {
+ printf("No enough memory!\n");
+ return -1;
+ }
+ hsm_fd = open("/dev/acrn_hsm", O_RDWR|O_CLOEXEC);
+
+ memcpy(&create_vm.uuid, &vm_uuid, 16);
+ create_vm.ioreq_buf = (__u64)io_req_buf;
+ ret = ioctl(hsm_fd, ACRN_IOCTL_CREATE_VM, &create_vm);
+ printf("Created VM! [%d]\n", ret);
+ vcpu_num = create_vm.vcpu_num;
+ vmid = create_vm.vmid;
+
+ /* setup guest memory */
+ ram_map.type = ACRN_MEMMAP_RAM;
+ ram_map.vma_base = (__u64)guest_memory;
+ ram_map.len = GUEST_MEMORY_SIZE;
+ ram_map.user_vm_pa = 0;
+ ram_map.attr = ACRN_MEM_ACCESS_RWX;
+ ret = ioctl(hsm_fd, ACRN_IOCTL_SET_MEMSEG, &ram_map);
+ printf("Set up VM memory! [%d]\n", ret);
+
+ memcpy(guest_memory, guest16, guest16_end-guest16);
+
+ /* setup vcpu registers */
+ memset(&regs, 0, sizeof(regs));
+ regs.vcpu_id = 0;
+ regs.vcpu_regs.rip = 0;
+
+ /* CR0_ET | CR0_NE */
+ regs.vcpu_regs.cr0 = 0x30U;
+ regs.vcpu_regs.cs_ar = 0x009FU;
+ regs.vcpu_regs.cs_sel = 0xF000U;
+ regs.vcpu_regs.cs_limit = 0xFFFFU;
+ regs.vcpu_regs.cs_base = 0 & 0xFFFF0000UL;
+ regs.vcpu_regs.rip = 0 & 0xFFFFUL;
+
+ ret = ioctl(hsm_fd, ACRN_IOCTL_SET_VCPU_REGS, &regs);
+ printf("Set up VM BSP registers! [%d]\n", ret);
+
+ /* create an ioreq client for this VM */
+ ret = ioctl(hsm_fd, ACRN_IOCTL_CREATE_IOREQ_CLIENT, 0);
+ printf("Created IO request client! [%d]\n", ret);
+
+ /* run vm */
+ ret = ioctl(hsm_fd, ACRN_IOCTL_START_VM, vmid);
+ printf("Start VM! [%d]\n", ret);
+
+ signal(SIGINT, vm_exit);
+ while (is_running) {
+ ret = ioctl(hsm_fd, ACRN_IOCTL_ATTACH_IOREQ_CLIENT, 0);
+
+ for (vcpu_id = 0; vcpu_id < vcpu_num; vcpu_id++) {
+ io_req = &io_req_buf[vcpu_id];
+ if ((__sync_add_and_fetch(&io_req->processed, 0) == ACRN_IOREQ_STATE_PROCESSING)
+ && (!io_req->kernel_handled))
+ if (io_req->type == ACRN_IOREQ_TYPE_PORTIO) {
+ int bytes, port, in;
+
+ port = io_req->reqs.pio_request.address;
+ bytes = io_req->reqs.pio_request.size;
+ in = (io_req->reqs.pio_request.direction == ACRN_IOREQ_DIR_READ);
+ printf("Guest VM %s PIO[%x] with size[%x]\n", in ? "read" : "write", port, bytes);
+
+ notify.vmid = vmid;
+ notify.vcpu = vcpu_id;
+ ioctl(hsm_fd, ACRN_IOCTL_NOTIFY_REQUEST_FINISH, &notify);
+ }
+ }
+ }
+
+ ret = ioctl(hsm_fd, ACRN_IOCTL_DESTROY_VM, NULL);
+ printf("Destroy VM! [%d]\n", ret);
+ close(hsm_fd);
+ free(guest_memory);
+ return 0;
+}
diff --git a/samples/auxdisplay/.gitignore b/samples/auxdisplay/.gitignore
index 7af222860a96..d023816849bd 100644
--- a/samples/auxdisplay/.gitignore
+++ b/samples/auxdisplay/.gitignore
@@ -1 +1,2 @@
-cfag12864b-example
+# SPDX-License-Identifier: GPL-2.0-only
+/cfag12864b-example
diff --git a/samples/auxdisplay/Makefile b/samples/auxdisplay/Makefile
index 0273bab27233..19d5568938c3 100644
--- a/samples/auxdisplay/Makefile
+++ b/samples/auxdisplay/Makefile
@@ -1,10 +1,2 @@
# SPDX-License-Identifier: GPL-2.0
-CC := $(CROSS_COMPILE)gcc
-CFLAGS := -I../../usr/include
-
-PROGS := cfag12864b-example
-
-all: $(PROGS)
-
-clean:
- rm -fr $(PROGS)
+userprogs-always-y += cfag12864b-example
diff --git a/samples/auxdisplay/cfag12864b-example.c b/samples/auxdisplay/cfag12864b-example.c
index bfeab44f81d0..2e3bb7375c99 100644
--- a/samples/auxdisplay/cfag12864b-example.c
+++ b/samples/auxdisplay/cfag12864b-example.c
@@ -4,7 +4,7 @@
* Version: 0.1.0
* Description: cfag12864b LCD userspace example program
*
- * Author: Copyright (C) Miguel Ojeda Sandonis
+ * Author: Copyright (C) Miguel Ojeda <ojeda@kernel.org>
* Date: 2006-10-31
*/
diff --git a/samples/binderfs/.gitignore b/samples/binderfs/.gitignore
new file mode 100644
index 000000000000..8fa415a3640b
--- /dev/null
+++ b/samples/binderfs/.gitignore
@@ -0,0 +1,2 @@
+# SPDX-License-Identifier: GPL-2.0
+/binderfs_example
diff --git a/samples/binderfs/Makefile b/samples/binderfs/Makefile
index ea4c93d36256..629e43b9b129 100644
--- a/samples/binderfs/Makefile
+++ b/samples/binderfs/Makefile
@@ -1,2 +1,4 @@
# SPDX-License-Identifier: GPL-2.0-only
-obj-$(CONFIG_SAMPLE_ANDROID_BINDERFS) += binderfs_example.o
+userprogs-always-y += binderfs_example
+
+userccflags += -I usr/include
diff --git a/samples/binderfs/binderfs_example.c b/samples/binderfs/binderfs_example.c
index 5bbd2ebc0aea..0fd92cdda460 100644
--- a/samples/binderfs/binderfs_example.c
+++ b/samples/binderfs/binderfs_example.c
@@ -18,7 +18,6 @@
int main(int argc, char *argv[])
{
int fd, ret, saved_errno;
- size_t len;
struct binderfs_device device = { 0 };
ret = unshare(CLONE_NEWNS);
diff --git a/samples/bpf/.gitignore b/samples/bpf/.gitignore
index 74d31fd3c99c..0e7bfdbff80a 100644
--- a/samples/bpf/.gitignore
+++ b/samples/bpf/.gitignore
@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0-only
cpustat
fds_example
hbm
@@ -44,8 +45,19 @@ xdp_monitor
xdp_redirect
xdp_redirect_cpu
xdp_redirect_map
+xdp_redirect_map_multi
xdp_router_ipv4
xdp_rxq_info
xdp_sample_pkts
xdp_tx_iptunnel
xdpsock
+xdpsock_ctrl_proc
+xsk_fwd
+testfile.img
+hbm_out.log
+iperf.*
+*.out
+*.skel.h
+/vmlinux.h
+/bpftool/
+/libbpf/
diff --git a/samples/bpf/Makefile b/samples/bpf/Makefile
index 79b0fee6943b..727da3c5879b 100644
--- a/samples/bpf/Makefile
+++ b/samples/bpf/Makefile
@@ -3,6 +3,8 @@
BPF_SAMPLES_PATH ?= $(abspath $(srctree)/$(src))
TOOLS_PATH := $(BPF_SAMPLES_PATH)/../../tools
+pound := \#
+
# List of programs to build
tprogs-y := test_lru_dist
tprogs-y += sock_example
@@ -39,76 +41,83 @@ tprogs-y += lwt_len_hist
tprogs-y += xdp_tx_iptunnel
tprogs-y += test_map_in_map
tprogs-y += per_socket_stats_example
-tprogs-y += xdp_redirect
-tprogs-y += xdp_redirect_map
-tprogs-y += xdp_redirect_cpu
-tprogs-y += xdp_monitor
tprogs-y += xdp_rxq_info
tprogs-y += syscall_tp
tprogs-y += cpustat
tprogs-y += xdp_adjust_tail
-tprogs-y += xdpsock
tprogs-y += xdp_fwd
tprogs-y += task_fd_query
tprogs-y += xdp_sample_pkts
tprogs-y += ibumad
tprogs-y += hbm
+tprogs-y += xdp_redirect_cpu
+tprogs-y += xdp_redirect_map_multi
+tprogs-y += xdp_redirect_map
+tprogs-y += xdp_redirect
+tprogs-y += xdp_monitor
+
# Libbpf dependencies
-LIBBPF = $(TOOLS_PATH)/lib/bpf/libbpf.a
+LIBBPF_SRC = $(TOOLS_PATH)/lib/bpf
+LIBBPF_OUTPUT = $(abspath $(BPF_SAMPLES_PATH))/libbpf
+LIBBPF_DESTDIR = $(LIBBPF_OUTPUT)
+LIBBPF_INCLUDE = $(LIBBPF_DESTDIR)/include
+LIBBPF = $(LIBBPF_OUTPUT)/libbpf.a
CGROUP_HELPERS := ../../tools/testing/selftests/bpf/cgroup_helpers.o
TRACE_HELPERS := ../../tools/testing/selftests/bpf/trace_helpers.o
+XDP_SAMPLE := xdp_sample_user.o
fds_example-objs := fds_example.o
sockex1-objs := sockex1_user.o
sockex2-objs := sockex2_user.o
-sockex3-objs := bpf_load.o sockex3_user.o
-tracex1-objs := bpf_load.o tracex1_user.o
-tracex2-objs := bpf_load.o tracex2_user.o
-tracex3-objs := bpf_load.o tracex3_user.o
-tracex4-objs := bpf_load.o tracex4_user.o
-tracex5-objs := bpf_load.o tracex5_user.o
-tracex6-objs := bpf_load.o tracex6_user.o
-tracex7-objs := bpf_load.o tracex7_user.o
-test_probe_write_user-objs := bpf_load.o test_probe_write_user_user.o
-trace_output-objs := bpf_load.o trace_output_user.o $(TRACE_HELPERS)
-lathist-objs := bpf_load.o lathist_user.o
-offwaketime-objs := bpf_load.o offwaketime_user.o $(TRACE_HELPERS)
-spintest-objs := bpf_load.o spintest_user.o $(TRACE_HELPERS)
-map_perf_test-objs := bpf_load.o map_perf_test_user.o
-test_overhead-objs := bpf_load.o test_overhead_user.o
+sockex3-objs := sockex3_user.o
+tracex1-objs := tracex1_user.o $(TRACE_HELPERS)
+tracex2-objs := tracex2_user.o
+tracex3-objs := tracex3_user.o
+tracex4-objs := tracex4_user.o
+tracex5-objs := tracex5_user.o $(TRACE_HELPERS)
+tracex6-objs := tracex6_user.o
+tracex7-objs := tracex7_user.o
+test_probe_write_user-objs := test_probe_write_user_user.o
+trace_output-objs := trace_output_user.o
+lathist-objs := lathist_user.o
+offwaketime-objs := offwaketime_user.o $(TRACE_HELPERS)
+spintest-objs := spintest_user.o $(TRACE_HELPERS)
+map_perf_test-objs := map_perf_test_user.o
+test_overhead-objs := test_overhead_user.o
test_cgrp2_array_pin-objs := test_cgrp2_array_pin.o
test_cgrp2_attach-objs := test_cgrp2_attach.o
test_cgrp2_sock-objs := test_cgrp2_sock.o
-test_cgrp2_sock2-objs := bpf_load.o test_cgrp2_sock2.o
+test_cgrp2_sock2-objs := test_cgrp2_sock2.o
xdp1-objs := xdp1_user.o
# reuse xdp1 source intentionally
xdp2-objs := xdp1_user.o
-xdp_router_ipv4-objs := xdp_router_ipv4_user.o
-test_current_task_under_cgroup-objs := bpf_load.o $(CGROUP_HELPERS) \
+test_current_task_under_cgroup-objs := $(CGROUP_HELPERS) \
test_current_task_under_cgroup_user.o
-trace_event-objs := bpf_load.o trace_event_user.o $(TRACE_HELPERS)
-sampleip-objs := bpf_load.o sampleip_user.o $(TRACE_HELPERS)
-tc_l2_redirect-objs := bpf_load.o tc_l2_redirect_user.o
-lwt_len_hist-objs := bpf_load.o lwt_len_hist_user.o
+trace_event-objs := trace_event_user.o $(TRACE_HELPERS)
+sampleip-objs := sampleip_user.o $(TRACE_HELPERS)
+tc_l2_redirect-objs := tc_l2_redirect_user.o
+lwt_len_hist-objs := lwt_len_hist_user.o
xdp_tx_iptunnel-objs := xdp_tx_iptunnel_user.o
-test_map_in_map-objs := bpf_load.o test_map_in_map_user.o
+test_map_in_map-objs := test_map_in_map_user.o
per_socket_stats_example-objs := cookie_uid_helper_example.o
-xdp_redirect-objs := xdp_redirect_user.o
-xdp_redirect_map-objs := xdp_redirect_map_user.o
-xdp_redirect_cpu-objs := bpf_load.o xdp_redirect_cpu_user.o
-xdp_monitor-objs := bpf_load.o xdp_monitor_user.o
xdp_rxq_info-objs := xdp_rxq_info_user.o
-syscall_tp-objs := bpf_load.o syscall_tp_user.o
-cpustat-objs := bpf_load.o cpustat_user.o
+syscall_tp-objs := syscall_tp_user.o
+cpustat-objs := cpustat_user.o
xdp_adjust_tail-objs := xdp_adjust_tail_user.o
-xdpsock-objs := xdpsock_user.o
xdp_fwd-objs := xdp_fwd_user.o
-task_fd_query-objs := bpf_load.o task_fd_query_user.o $(TRACE_HELPERS)
-xdp_sample_pkts-objs := xdp_sample_pkts_user.o $(TRACE_HELPERS)
-ibumad-objs := bpf_load.o ibumad_user.o $(TRACE_HELPERS)
-hbm-objs := bpf_load.o hbm.o $(CGROUP_HELPERS)
+task_fd_query-objs := task_fd_query_user.o $(TRACE_HELPERS)
+xdp_sample_pkts-objs := xdp_sample_pkts_user.o
+ibumad-objs := ibumad_user.o
+hbm-objs := hbm.o $(CGROUP_HELPERS)
+
+xdp_redirect_map_multi-objs := xdp_redirect_map_multi_user.o $(XDP_SAMPLE)
+xdp_redirect_cpu-objs := xdp_redirect_cpu_user.o $(XDP_SAMPLE)
+xdp_redirect_map-objs := xdp_redirect_map_user.o $(XDP_SAMPLE)
+xdp_redirect-objs := xdp_redirect_user.o $(XDP_SAMPLE)
+xdp_monitor-objs := xdp_monitor_user.o $(XDP_SAMPLE)
+xdp_router_ipv4-objs := xdp_router_ipv4_user.o $(XDP_SAMPLE)
# Tell kbuild to always build the programs
always-y := $(tprogs-y)
@@ -138,7 +147,6 @@ always-y += parse_varlen.o parse_simple.o parse_ldabs.o
always-y += test_cgrp2_tc_kern.o
always-y += xdp1_kern.o
always-y += xdp2_kern.o
-always-y += xdp_router_ipv4_kern.o
always-y += test_current_task_under_cgroup_kern.o
always-y += trace_event_kern.o
always-y += sampleip_kern.o
@@ -154,10 +162,6 @@ always-y += tcp_clamp_kern.o
always-y += tcp_basertt_kern.o
always-y += tcp_tos_reflect_kern.o
always-y += tcp_dumpstats_kern.o
-always-y += xdp_redirect_kern.o
-always-y += xdp_redirect_map_kern.o
-always-y += xdp_redirect_cpu_kern.o
-always-y += xdp_monitor_kern.o
always-y += xdp_rxq_info_kern.o
always-y += xdp2skb_meta_kern.o
always-y += syscall_tp_kern.o
@@ -169,7 +173,6 @@ always-y += xdp_sample_pkts_kern.o
always-y += ibumad_kern.o
always-y += hbm_out_kern.o
always-y += hbm_edt_kern.o
-always-y += xdpsock_kern.o
ifeq ($(ARCH), arm)
# Strip all except -D__LINUX_ARM_ARCH__ option needed to handle linux
@@ -179,13 +182,21 @@ BPF_EXTRA_CFLAGS := $(ARM_ARCH_SELECTOR)
TPROGS_CFLAGS += $(ARM_ARCH_SELECTOR)
endif
+ifeq ($(ARCH), mips)
+TPROGS_CFLAGS += -D__SANE_USERSPACE_TYPES__
+ifdef CONFIG_MACH_LOONGSON64
+BPF_EXTRA_CFLAGS += -I$(srctree)/arch/mips/include/asm/mach-loongson64
+BPF_EXTRA_CFLAGS += -I$(srctree)/arch/mips/include/asm/mach-generic
+endif
+endif
+
TPROGS_CFLAGS += -Wall -O2
TPROGS_CFLAGS += -Wmissing-prototypes
TPROGS_CFLAGS += -Wstrict-prototypes
TPROGS_CFLAGS += -I$(objtree)/usr/include
TPROGS_CFLAGS += -I$(srctree)/tools/testing/selftests/bpf/
-TPROGS_CFLAGS += -I$(srctree)/tools/lib/
+TPROGS_CFLAGS += -I$(LIBBPF_INCLUDE)
TPROGS_CFLAGS += -I$(srctree)/tools/include
TPROGS_CFLAGS += -I$(srctree)/tools/perf
TPROGS_CFLAGS += -DHAVE_ATTR_TEST=0
@@ -195,20 +206,26 @@ TPROGS_CFLAGS += --sysroot=$(SYSROOT)
TPROGS_LDFLAGS := -L$(SYSROOT)/usr/lib
endif
-TPROGCFLAGS_bpf_load.o += -Wno-unused-variable
-
TPROGS_LDLIBS += $(LIBBPF) -lelf -lz
+TPROGLDLIBS_xdp_monitor += -lm
+TPROGLDLIBS_xdp_redirect += -lm
+TPROGLDLIBS_xdp_redirect_cpu += -lm
+TPROGLDLIBS_xdp_redirect_map += -lm
+TPROGLDLIBS_xdp_redirect_map_multi += -lm
+TPROGLDLIBS_xdp_router_ipv4 += -lm -pthread
TPROGLDLIBS_tracex4 += -lrt
TPROGLDLIBS_trace_output += -lrt
TPROGLDLIBS_map_perf_test += -lrt
TPROGLDLIBS_test_overhead += -lrt
-TPROGLDLIBS_xdpsock += -pthread
# Allows pointing LLC/CLANG to a LLVM backend with bpf support, redefine on cmdline:
-# make M=samples/bpf/ LLC=~/git/llvm/build/bin/llc CLANG=~/git/llvm/build/bin/clang
+# make M=samples/bpf LLC=~/git/llvm-project/llvm/build/bin/llc CLANG=~/git/llvm-project/llvm/build/bin/clang
LLC ?= llc
CLANG ?= clang
+OPT ?= opt
+LLVM_DIS ?= llvm-dis
LLVM_OBJCOPY ?= llvm-objcopy
+LLVM_READELF ?= llvm-readelf
BTF_PAHOLE ?= pahole
# Detect that we're cross compiling and use the cross compiler
@@ -218,7 +235,7 @@ endif
# Don't evaluate probes and warnings if we need to run make recursively
ifneq ($(src),)
-HDR_PROBE := $(shell printf "\#include <linux/types.h>\n struct list_head { int a; }; int main() { return 0; }" | \
+HDR_PROBE := $(shell printf "$(pound)include <linux/types.h>\n struct list_head { int a; }; int main() { return 0; }" | \
$(CC) $(TPROGS_CFLAGS) $(TPROGS_LDFLAGS) -x c - \
-o /dev/null 2>/dev/null && echo okay)
@@ -232,7 +249,7 @@ BTF_PAHOLE_PROBE := $(shell $(BTF_PAHOLE) --help 2>&1 | grep BTF)
BTF_OBJCOPY_PROBE := $(shell $(LLVM_OBJCOPY) --help 2>&1 | grep -i 'usage.*llvm')
BTF_LLVM_PROBE := $(shell echo "int main() { return 0; }" | \
$(CLANG) -target bpf -O2 -g -c -x c - -o ./llvm_btf_verify.o; \
- readelf -S ./llvm_btf_verify.o | grep BTF; \
+ $(LLVM_READELF) -S ./llvm_btf_verify.o | grep BTF; \
/bin/rm -f ./llvm_btf_verify.o)
BPF_EXTRA_CFLAGS += -fno-stack-protector
@@ -254,11 +271,25 @@ all:
clean:
$(MAKE) -C ../../ M=$(CURDIR) clean
@find $(CURDIR) -type f -name '*~' -delete
+ @$(RM) -r $(CURDIR)/libbpf $(CURDIR)/bpftool
-$(LIBBPF): FORCE
+$(LIBBPF): $(wildcard $(LIBBPF_SRC)/*.[ch] $(LIBBPF_SRC)/Makefile) | $(LIBBPF_OUTPUT)
# Fix up variables inherited from Kbuild that tools/ build system won't like
- $(MAKE) -C $(dir $@) RM='rm -rf' EXTRA_CFLAGS="$(TPROGS_CFLAGS)" \
- LDFLAGS=$(TPROGS_LDFLAGS) srctree=$(BPF_SAMPLES_PATH)/../../ O=
+ $(MAKE) -C $(LIBBPF_SRC) RM='rm -rf' EXTRA_CFLAGS="$(TPROGS_CFLAGS)" \
+ LDFLAGS=$(TPROGS_LDFLAGS) srctree=$(BPF_SAMPLES_PATH)/../../ \
+ O= OUTPUT=$(LIBBPF_OUTPUT)/ DESTDIR=$(LIBBPF_DESTDIR) prefix= \
+ $@ install_headers
+
+BPFTOOLDIR := $(TOOLS_PATH)/bpf/bpftool
+BPFTOOL_OUTPUT := $(abspath $(BPF_SAMPLES_PATH))/bpftool
+BPFTOOL := $(BPFTOOL_OUTPUT)/bootstrap/bpftool
+$(BPFTOOL): $(wildcard $(BPFTOOLDIR)/*.[ch] $(BPFTOOLDIR)/Makefile) | $(BPFTOOL_OUTPUT)
+ $(MAKE) -C $(BPFTOOLDIR) srctree=$(BPF_SAMPLES_PATH)/../../ \
+ OUTPUT=$(BPFTOOL_OUTPUT)/ bootstrap
+
+$(LIBBPF_OUTPUT) $(BPFTOOL_OUTPUT):
+ $(call msg,MKDIR,$@)
+ $(Q)mkdir -p $@
$(obj)/syscall_nrs.h: $(obj)/syscall_nrs.s FORCE
$(call filechk,offsets,__SYSCALL_NRS_H__)
@@ -290,28 +321,130 @@ verify_target_bpf: verify_cmds
$(BPF_SAMPLES_PATH)/*.c: verify_target_bpf $(LIBBPF)
$(src)/*.c: verify_target_bpf $(LIBBPF)
+libbpf_hdrs: $(LIBBPF)
+$(obj)/$(TRACE_HELPERS) $(obj)/$(CGROUP_HELPERS) $(obj)/$(XDP_SAMPLE): | libbpf_hdrs
+
+.PHONY: libbpf_hdrs
+
+$(obj)/xdp_redirect_cpu_user.o: $(obj)/xdp_redirect_cpu.skel.h
+$(obj)/xdp_redirect_map_multi_user.o: $(obj)/xdp_redirect_map_multi.skel.h
+$(obj)/xdp_redirect_map_user.o: $(obj)/xdp_redirect_map.skel.h
+$(obj)/xdp_redirect_user.o: $(obj)/xdp_redirect.skel.h
+$(obj)/xdp_monitor_user.o: $(obj)/xdp_monitor.skel.h
+$(obj)/xdp_router_ipv4_user.o: $(obj)/xdp_router_ipv4.skel.h
+
$(obj)/tracex5_kern.o: $(obj)/syscall_nrs.h
$(obj)/hbm_out_kern.o: $(src)/hbm.h $(src)/hbm_kern.h
$(obj)/hbm.o: $(src)/hbm.h
$(obj)/hbm_edt_kern.o: $(src)/hbm.h $(src)/hbm_kern.h
+# Override includes for xdp_sample_user.o because $(srctree)/usr/include in
+# TPROGS_CFLAGS causes conflicts
+XDP_SAMPLE_CFLAGS += -Wall -O2 \
+ -I$(src)/../../tools/include \
+ -I$(src)/../../tools/include/uapi \
+ -I$(LIBBPF_INCLUDE) \
+ -I$(src)/../../tools/testing/selftests/bpf
+
+$(obj)/$(XDP_SAMPLE): TPROGS_CFLAGS = $(XDP_SAMPLE_CFLAGS)
+$(obj)/$(XDP_SAMPLE): $(src)/xdp_sample_user.h $(src)/xdp_sample_shared.h
+
-include $(BPF_SAMPLES_PATH)/Makefile.target
+VMLINUX_BTF_PATHS ?= $(abspath $(if $(O),$(O)/vmlinux)) \
+ $(abspath $(if $(KBUILD_OUTPUT),$(KBUILD_OUTPUT)/vmlinux)) \
+ $(abspath ./vmlinux)
+VMLINUX_BTF ?= $(abspath $(firstword $(wildcard $(VMLINUX_BTF_PATHS))))
+
+$(obj)/vmlinux.h: $(VMLINUX_BTF) $(BPFTOOL)
+ifeq ($(VMLINUX_H),)
+ifeq ($(VMLINUX_BTF),)
+ $(error Cannot find a vmlinux for VMLINUX_BTF at any of "$(VMLINUX_BTF_PATHS)",\
+ build the kernel or set VMLINUX_BTF or VMLINUX_H variable)
+endif
+ $(Q)$(BPFTOOL) btf dump file $(VMLINUX_BTF) format c > $@
+else
+ $(Q)cp "$(VMLINUX_H)" $@
+endif
+
+clean-files += vmlinux.h
+
+# Get Clang's default includes on this system, as opposed to those seen by
+# '-target bpf'. This fixes "missing" files on some architectures/distros,
+# such as asm/byteorder.h, asm/socket.h, asm/sockios.h, sys/cdefs.h etc.
+#
+# Use '-idirafter': Don't interfere with include mechanics except where the
+# build would have failed anyways.
+define get_sys_includes
+$(shell $(1) -v -E - </dev/null 2>&1 \
+ | sed -n '/<...> search starts here:/,/End of search list./{ s| \(/.*\)|-idirafter \1|p }') \
+$(shell $(1) -dM -E - </dev/null | grep '#define __riscv_xlen ' | sed 's/#define /-D/' | sed 's/ /=/')
+endef
+
+CLANG_SYS_INCLUDES = $(call get_sys_includes,$(CLANG))
+
+$(obj)/xdp_redirect_cpu.bpf.o: $(obj)/xdp_sample.bpf.o
+$(obj)/xdp_redirect_map_multi.bpf.o: $(obj)/xdp_sample.bpf.o
+$(obj)/xdp_redirect_map.bpf.o: $(obj)/xdp_sample.bpf.o
+$(obj)/xdp_redirect.bpf.o: $(obj)/xdp_sample.bpf.o
+$(obj)/xdp_monitor.bpf.o: $(obj)/xdp_sample.bpf.o
+$(obj)/xdp_router_ipv4.bpf.o: $(obj)/xdp_sample.bpf.o
+
+$(obj)/%.bpf.o: $(src)/%.bpf.c $(obj)/vmlinux.h $(src)/xdp_sample.bpf.h $(src)/xdp_sample_shared.h
+ @echo " CLANG-BPF " $@
+ $(Q)$(CLANG) -g -O2 -target bpf -D__TARGET_ARCH_$(SRCARCH) \
+ -Wno-compare-distinct-pointer-types -I$(srctree)/include \
+ -I$(srctree)/samples/bpf -I$(srctree)/tools/include \
+ -I$(LIBBPF_INCLUDE) $(CLANG_SYS_INCLUDES) \
+ -c $(filter %.bpf.c,$^) -o $@
+
+LINKED_SKELS := xdp_redirect_cpu.skel.h xdp_redirect_map_multi.skel.h \
+ xdp_redirect_map.skel.h xdp_redirect.skel.h xdp_monitor.skel.h \
+ xdp_router_ipv4.skel.h
+clean-files += $(LINKED_SKELS)
+
+xdp_redirect_cpu.skel.h-deps := xdp_redirect_cpu.bpf.o xdp_sample.bpf.o
+xdp_redirect_map_multi.skel.h-deps := xdp_redirect_map_multi.bpf.o xdp_sample.bpf.o
+xdp_redirect_map.skel.h-deps := xdp_redirect_map.bpf.o xdp_sample.bpf.o
+xdp_redirect.skel.h-deps := xdp_redirect.bpf.o xdp_sample.bpf.o
+xdp_monitor.skel.h-deps := xdp_monitor.bpf.o xdp_sample.bpf.o
+xdp_router_ipv4.skel.h-deps := xdp_router_ipv4.bpf.o xdp_sample.bpf.o
+
+LINKED_BPF_SRCS := $(patsubst %.bpf.o,%.bpf.c,$(foreach skel,$(LINKED_SKELS),$($(skel)-deps)))
+
+BPF_SRCS_LINKED := $(notdir $(wildcard $(src)/*.bpf.c))
+BPF_OBJS_LINKED := $(patsubst %.bpf.c,$(obj)/%.bpf.o, $(BPF_SRCS_LINKED))
+BPF_SKELS_LINKED := $(addprefix $(obj)/,$(LINKED_SKELS))
+
+$(BPF_SKELS_LINKED): $(BPF_OBJS_LINKED) $(BPFTOOL)
+ @echo " BPF GEN-OBJ " $(@:.skel.h=)
+ $(Q)$(BPFTOOL) gen object $(@:.skel.h=.lbpf.o) $(addprefix $(obj)/,$($(@F)-deps))
+ @echo " BPF GEN-SKEL" $(@:.skel.h=)
+ $(Q)$(BPFTOOL) gen skeleton $(@:.skel.h=.lbpf.o) name $(notdir $(@:.skel.h=)) > $@
+
# asm/sysreg.h - inline assembly used by it is incompatible with llvm.
# But, there is no easy way to fix it, so just exclude it since it is
# useless for BPF samples.
+# below we use long chain of commands, clang | opt | llvm-dis | llc,
+# to generate final object file. 'clang' compiles the source into IR
+# with native target, e.g., x64, arm64, etc. 'opt' does bpf CORE IR builtin
+# processing (llvm12) and IR optimizations. 'llvm-dis' converts
+# 'opt' output to IR, and finally 'llc' generates bpf byte code.
$(obj)/%.o: $(src)/%.c
@echo " CLANG-bpf " $@
$(Q)$(CLANG) $(NOSTDINC_FLAGS) $(LINUXINCLUDE) $(BPF_EXTRA_CFLAGS) \
-I$(obj) -I$(srctree)/tools/testing/selftests/bpf/ \
- -I$(srctree)/tools/lib/ \
+ -I$(LIBBPF_INCLUDE) \
-D__KERNEL__ -D__BPF_TRACING__ -Wno-unused-value -Wno-pointer-sign \
-D__TARGET_ARCH_$(SRCARCH) -Wno-compare-distinct-pointer-types \
-Wno-gnu-variable-sized-type-not-at-end \
-Wno-address-of-packed-member -Wno-tautological-compare \
-Wno-unknown-warning-option $(CLANG_ARCH_ARGS) \
+ -fno-asynchronous-unwind-tables \
-I$(srctree)/samples/bpf/ -include asm_goto_workaround.h \
- -O2 -emit-llvm -c $< -o -| $(LLC) -march=bpf $(LLC_FLAGS) -filetype=obj -o $@
+ -O2 -emit-llvm -Xclang -disable-llvm-passes -c $< -o - | \
+ $(OPT) -O2 -mtriple=bpf-pc-linux | $(LLVM_DIS) | \
+ $(LLC) -march=bpf $(LLC_FLAGS) -filetype=obj -o $@
ifeq ($(DWARF2BTF),y)
$(BTF_PAHOLE) -J $@
endif
diff --git a/samples/bpf/README.rst b/samples/bpf/README.rst
index dd34b2d26f1c..60c6494adb1b 100644
--- a/samples/bpf/README.rst
+++ b/samples/bpf/README.rst
@@ -62,20 +62,26 @@ To generate a smaller llc binary one can use::
-DLLVM_TARGETS_TO_BUILD="BPF"
+We recommend that developers who want the fastest incremental builds
+use the Ninja build system, you can find it in your system's package
+manager, usually the package is ninja or ninja-build.
+
Quick sniplet for manually compiling LLVM and clang
-(build dependencies are cmake and gcc-c++)::
+(build dependencies are ninja, cmake and gcc-c++)::
- $ git clone http://llvm.org/git/llvm.git
- $ cd llvm/tools
- $ git clone --depth 1 http://llvm.org/git/clang.git
- $ cd ..; mkdir build; cd build
- $ cmake .. -DLLVM_TARGETS_TO_BUILD="BPF;X86"
- $ make -j $(getconf _NPROCESSORS_ONLN)
+ $ git clone https://github.com/llvm/llvm-project.git
+ $ mkdir -p llvm-project/llvm/build
+ $ cd llvm-project/llvm/build
+ $ cmake .. -G "Ninja" -DLLVM_TARGETS_TO_BUILD="BPF;X86" \
+ -DLLVM_ENABLE_PROJECTS="clang" \
+ -DCMAKE_BUILD_TYPE=Release \
+ -DLLVM_BUILD_RUNTIME=OFF
+ $ ninja
It is also possible to point make to the newly compiled 'llc' or
'clang' command via redefining LLC or CLANG on the make command line::
- make M=samples/bpf LLC=~/git/llvm/build/bin/llc CLANG=~/git/llvm/build/bin/clang
+ make M=samples/bpf LLC=~/git/llvm-project/llvm/build/bin/llc CLANG=~/git/llvm-project/llvm/build/bin/clang
Cross compiling samples
-----------------------
diff --git a/samples/bpf/bpf_insn.h b/samples/bpf/bpf_insn.h
index 544237980582..29c3bb6ad1cd 100644
--- a/samples/bpf/bpf_insn.h
+++ b/samples/bpf/bpf_insn.h
@@ -1,4 +1,4 @@
-/* SPDX-License-Identifier: GPL-2.0 */
+/* SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) */
/* eBPF instruction mini library */
#ifndef __BPF_INSN_H
#define __BPF_INSN_H
@@ -134,15 +134,31 @@ struct bpf_insn;
.off = OFF, \
.imm = 0 })
-/* Atomic memory add, *(uint *)(dst_reg + off16) += src_reg */
-
-#define BPF_STX_XADD(SIZE, DST, SRC, OFF) \
- ((struct bpf_insn) { \
- .code = BPF_STX | BPF_SIZE(SIZE) | BPF_XADD, \
+/*
+ * Atomic operations:
+ *
+ * BPF_ADD *(uint *) (dst_reg + off16) += src_reg
+ * BPF_AND *(uint *) (dst_reg + off16) &= src_reg
+ * BPF_OR *(uint *) (dst_reg + off16) |= src_reg
+ * BPF_XOR *(uint *) (dst_reg + off16) ^= src_reg
+ * BPF_ADD | BPF_FETCH src_reg = atomic_fetch_add(dst_reg + off16, src_reg);
+ * BPF_AND | BPF_FETCH src_reg = atomic_fetch_and(dst_reg + off16, src_reg);
+ * BPF_OR | BPF_FETCH src_reg = atomic_fetch_or(dst_reg + off16, src_reg);
+ * BPF_XOR | BPF_FETCH src_reg = atomic_fetch_xor(dst_reg + off16, src_reg);
+ * BPF_XCHG src_reg = atomic_xchg(dst_reg + off16, src_reg)
+ * BPF_CMPXCHG r0 = atomic_cmpxchg(dst_reg + off16, r0, src_reg)
+ */
+
+#define BPF_ATOMIC_OP(SIZE, OP, DST, SRC, OFF) \
+ ((struct bpf_insn) { \
+ .code = BPF_STX | BPF_SIZE(SIZE) | BPF_ATOMIC, \
.dst_reg = DST, \
.src_reg = SRC, \
.off = OFF, \
- .imm = 0 })
+ .imm = OP })
+
+/* Legacy alias */
+#define BPF_STX_XADD(SIZE, DST, SRC, OFF) BPF_ATOMIC_OP(SIZE, BPF_ADD, DST, SRC, OFF)
/* Memory store, *(uint *) (dst_reg + off16) = imm32 */
diff --git a/samples/bpf/bpf_load.c b/samples/bpf/bpf_load.c
deleted file mode 100644
index 4574b1939e49..000000000000
--- a/samples/bpf/bpf_load.c
+++ /dev/null
@@ -1,687 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-#include <stdio.h>
-#include <sys/types.h>
-#include <sys/stat.h>
-#include <fcntl.h>
-#include <libelf.h>
-#include <gelf.h>
-#include <errno.h>
-#include <unistd.h>
-#include <string.h>
-#include <stdbool.h>
-#include <stdlib.h>
-#include <linux/bpf.h>
-#include <linux/filter.h>
-#include <linux/perf_event.h>
-#include <linux/netlink.h>
-#include <linux/rtnetlink.h>
-#include <linux/types.h>
-#include <sys/socket.h>
-#include <sys/syscall.h>
-#include <sys/ioctl.h>
-#include <sys/mman.h>
-#include <poll.h>
-#include <ctype.h>
-#include <assert.h>
-#include <bpf/bpf.h>
-#include "bpf_load.h"
-#include "perf-sys.h"
-
-#define DEBUGFS "/sys/kernel/debug/tracing/"
-
-static char license[128];
-static int kern_version;
-static bool processed_sec[128];
-char bpf_log_buf[BPF_LOG_BUF_SIZE];
-int map_fd[MAX_MAPS];
-int prog_fd[MAX_PROGS];
-int event_fd[MAX_PROGS];
-int prog_cnt;
-int prog_array_fd = -1;
-
-struct bpf_map_data map_data[MAX_MAPS];
-int map_data_count;
-
-static int populate_prog_array(const char *event, int prog_fd)
-{
- int ind = atoi(event), err;
-
- err = bpf_map_update_elem(prog_array_fd, &ind, &prog_fd, BPF_ANY);
- if (err < 0) {
- printf("failed to store prog_fd in prog_array\n");
- return -1;
- }
- return 0;
-}
-
-static int write_kprobe_events(const char *val)
-{
- int fd, ret, flags;
-
- if (val == NULL)
- return -1;
- else if (val[0] == '\0')
- flags = O_WRONLY | O_TRUNC;
- else
- flags = O_WRONLY | O_APPEND;
-
- fd = open(DEBUGFS "kprobe_events", flags);
-
- ret = write(fd, val, strlen(val));
- close(fd);
-
- return ret;
-}
-
-static int load_and_attach(const char *event, struct bpf_insn *prog, int size)
-{
- bool is_socket = strncmp(event, "socket", 6) == 0;
- bool is_kprobe = strncmp(event, "kprobe/", 7) == 0;
- bool is_kretprobe = strncmp(event, "kretprobe/", 10) == 0;
- bool is_tracepoint = strncmp(event, "tracepoint/", 11) == 0;
- bool is_raw_tracepoint = strncmp(event, "raw_tracepoint/", 15) == 0;
- bool is_xdp = strncmp(event, "xdp", 3) == 0;
- bool is_perf_event = strncmp(event, "perf_event", 10) == 0;
- bool is_cgroup_skb = strncmp(event, "cgroup/skb", 10) == 0;
- bool is_cgroup_sk = strncmp(event, "cgroup/sock", 11) == 0;
- bool is_sockops = strncmp(event, "sockops", 7) == 0;
- bool is_sk_skb = strncmp(event, "sk_skb", 6) == 0;
- bool is_sk_msg = strncmp(event, "sk_msg", 6) == 0;
- size_t insns_cnt = size / sizeof(struct bpf_insn);
- enum bpf_prog_type prog_type;
- char buf[256];
- int fd, efd, err, id;
- struct perf_event_attr attr = {};
-
- attr.type = PERF_TYPE_TRACEPOINT;
- attr.sample_type = PERF_SAMPLE_RAW;
- attr.sample_period = 1;
- attr.wakeup_events = 1;
-
- if (is_socket) {
- prog_type = BPF_PROG_TYPE_SOCKET_FILTER;
- } else if (is_kprobe || is_kretprobe) {
- prog_type = BPF_PROG_TYPE_KPROBE;
- } else if (is_tracepoint) {
- prog_type = BPF_PROG_TYPE_TRACEPOINT;
- } else if (is_raw_tracepoint) {
- prog_type = BPF_PROG_TYPE_RAW_TRACEPOINT;
- } else if (is_xdp) {
- prog_type = BPF_PROG_TYPE_XDP;
- } else if (is_perf_event) {
- prog_type = BPF_PROG_TYPE_PERF_EVENT;
- } else if (is_cgroup_skb) {
- prog_type = BPF_PROG_TYPE_CGROUP_SKB;
- } else if (is_cgroup_sk) {
- prog_type = BPF_PROG_TYPE_CGROUP_SOCK;
- } else if (is_sockops) {
- prog_type = BPF_PROG_TYPE_SOCK_OPS;
- } else if (is_sk_skb) {
- prog_type = BPF_PROG_TYPE_SK_SKB;
- } else if (is_sk_msg) {
- prog_type = BPF_PROG_TYPE_SK_MSG;
- } else {
- printf("Unknown event '%s'\n", event);
- return -1;
- }
-
- if (prog_cnt == MAX_PROGS)
- return -1;
-
- fd = bpf_load_program(prog_type, prog, insns_cnt, license, kern_version,
- bpf_log_buf, BPF_LOG_BUF_SIZE);
- if (fd < 0) {
- printf("bpf_load_program() err=%d\n%s", errno, bpf_log_buf);
- return -1;
- }
-
- prog_fd[prog_cnt++] = fd;
-
- if (is_xdp || is_perf_event || is_cgroup_skb || is_cgroup_sk)
- return 0;
-
- if (is_socket || is_sockops || is_sk_skb || is_sk_msg) {
- if (is_socket)
- event += 6;
- else
- event += 7;
- if (*event != '/')
- return 0;
- event++;
- if (!isdigit(*event)) {
- printf("invalid prog number\n");
- return -1;
- }
- return populate_prog_array(event, fd);
- }
-
- if (is_raw_tracepoint) {
- efd = bpf_raw_tracepoint_open(event + 15, fd);
- if (efd < 0) {
- printf("tracepoint %s %s\n", event + 15, strerror(errno));
- return -1;
- }
- event_fd[prog_cnt - 1] = efd;
- return 0;
- }
-
- if (is_kprobe || is_kretprobe) {
- bool need_normal_check = true;
- const char *event_prefix = "";
-
- if (is_kprobe)
- event += 7;
- else
- event += 10;
-
- if (*event == 0) {
- printf("event name cannot be empty\n");
- return -1;
- }
-
- if (isdigit(*event))
- return populate_prog_array(event, fd);
-
-#ifdef __x86_64__
- if (strncmp(event, "sys_", 4) == 0) {
- snprintf(buf, sizeof(buf), "%c:__x64_%s __x64_%s",
- is_kprobe ? 'p' : 'r', event, event);
- err = write_kprobe_events(buf);
- if (err >= 0) {
- need_normal_check = false;
- event_prefix = "__x64_";
- }
- }
-#endif
- if (need_normal_check) {
- snprintf(buf, sizeof(buf), "%c:%s %s",
- is_kprobe ? 'p' : 'r', event, event);
- err = write_kprobe_events(buf);
- if (err < 0) {
- printf("failed to create kprobe '%s' error '%s'\n",
- event, strerror(errno));
- return -1;
- }
- }
-
- strcpy(buf, DEBUGFS);
- strcat(buf, "events/kprobes/");
- strcat(buf, event_prefix);
- strcat(buf, event);
- strcat(buf, "/id");
- } else if (is_tracepoint) {
- event += 11;
-
- if (*event == 0) {
- printf("event name cannot be empty\n");
- return -1;
- }
- strcpy(buf, DEBUGFS);
- strcat(buf, "events/");
- strcat(buf, event);
- strcat(buf, "/id");
- }
-
- efd = open(buf, O_RDONLY, 0);
- if (efd < 0) {
- printf("failed to open event %s\n", event);
- return -1;
- }
-
- err = read(efd, buf, sizeof(buf));
- if (err < 0 || err >= sizeof(buf)) {
- printf("read from '%s' failed '%s'\n", event, strerror(errno));
- return -1;
- }
-
- close(efd);
-
- buf[err] = 0;
- id = atoi(buf);
- attr.config = id;
-
- efd = sys_perf_event_open(&attr, -1/*pid*/, 0/*cpu*/, -1/*group_fd*/, 0);
- if (efd < 0) {
- printf("event %d fd %d err %s\n", id, efd, strerror(errno));
- return -1;
- }
- event_fd[prog_cnt - 1] = efd;
- err = ioctl(efd, PERF_EVENT_IOC_ENABLE, 0);
- if (err < 0) {
- printf("ioctl PERF_EVENT_IOC_ENABLE failed err %s\n",
- strerror(errno));
- return -1;
- }
- err = ioctl(efd, PERF_EVENT_IOC_SET_BPF, fd);
- if (err < 0) {
- printf("ioctl PERF_EVENT_IOC_SET_BPF failed err %s\n",
- strerror(errno));
- return -1;
- }
-
- return 0;
-}
-
-static int load_maps(struct bpf_map_data *maps, int nr_maps,
- fixup_map_cb fixup_map)
-{
- int i, numa_node;
-
- for (i = 0; i < nr_maps; i++) {
- if (fixup_map) {
- fixup_map(&maps[i], i);
- /* Allow userspace to assign map FD prior to creation */
- if (maps[i].fd != -1) {
- map_fd[i] = maps[i].fd;
- continue;
- }
- }
-
- numa_node = maps[i].def.map_flags & BPF_F_NUMA_NODE ?
- maps[i].def.numa_node : -1;
-
- if (maps[i].def.type == BPF_MAP_TYPE_ARRAY_OF_MAPS ||
- maps[i].def.type == BPF_MAP_TYPE_HASH_OF_MAPS) {
- int inner_map_fd = map_fd[maps[i].def.inner_map_idx];
-
- map_fd[i] = bpf_create_map_in_map_node(maps[i].def.type,
- maps[i].name,
- maps[i].def.key_size,
- inner_map_fd,
- maps[i].def.max_entries,
- maps[i].def.map_flags,
- numa_node);
- } else {
- map_fd[i] = bpf_create_map_node(maps[i].def.type,
- maps[i].name,
- maps[i].def.key_size,
- maps[i].def.value_size,
- maps[i].def.max_entries,
- maps[i].def.map_flags,
- numa_node);
- }
- if (map_fd[i] < 0) {
- printf("failed to create map %d (%s): %d %s\n",
- i, maps[i].name, errno, strerror(errno));
- return 1;
- }
- maps[i].fd = map_fd[i];
-
- if (maps[i].def.type == BPF_MAP_TYPE_PROG_ARRAY)
- prog_array_fd = map_fd[i];
- }
- return 0;
-}
-
-static int get_sec(Elf *elf, int i, GElf_Ehdr *ehdr, char **shname,
- GElf_Shdr *shdr, Elf_Data **data)
-{
- Elf_Scn *scn;
-
- scn = elf_getscn(elf, i);
- if (!scn)
- return 1;
-
- if (gelf_getshdr(scn, shdr) != shdr)
- return 2;
-
- *shname = elf_strptr(elf, ehdr->e_shstrndx, shdr->sh_name);
- if (!*shname || !shdr->sh_size)
- return 3;
-
- *data = elf_getdata(scn, 0);
- if (!*data || elf_getdata(scn, *data) != NULL)
- return 4;
-
- return 0;
-}
-
-static int parse_relo_and_apply(Elf_Data *data, Elf_Data *symbols,
- GElf_Shdr *shdr, struct bpf_insn *insn,
- struct bpf_map_data *maps, int nr_maps)
-{
- int i, nrels;
-
- nrels = shdr->sh_size / shdr->sh_entsize;
-
- for (i = 0; i < nrels; i++) {
- GElf_Sym sym;
- GElf_Rel rel;
- unsigned int insn_idx;
- bool match = false;
- int j, map_idx;
-
- gelf_getrel(data, i, &rel);
-
- insn_idx = rel.r_offset / sizeof(struct bpf_insn);
-
- gelf_getsym(symbols, GELF_R_SYM(rel.r_info), &sym);
-
- if (insn[insn_idx].code != (BPF_LD | BPF_IMM | BPF_DW)) {
- printf("invalid relo for insn[%d].code 0x%x\n",
- insn_idx, insn[insn_idx].code);
- return 1;
- }
- insn[insn_idx].src_reg = BPF_PSEUDO_MAP_FD;
-
- /* Match FD relocation against recorded map_data[] offset */
- for (map_idx = 0; map_idx < nr_maps; map_idx++) {
- if (maps[map_idx].elf_offset == sym.st_value) {
- match = true;
- break;
- }
- }
- if (match) {
- insn[insn_idx].imm = maps[map_idx].fd;
- } else {
- printf("invalid relo for insn[%d] no map_data match\n",
- insn_idx);
- return 1;
- }
- }
-
- return 0;
-}
-
-static int cmp_symbols(const void *l, const void *r)
-{
- const GElf_Sym *lsym = (const GElf_Sym *)l;
- const GElf_Sym *rsym = (const GElf_Sym *)r;
-
- if (lsym->st_value < rsym->st_value)
- return -1;
- else if (lsym->st_value > rsym->st_value)
- return 1;
- else
- return 0;
-}
-
-static int load_elf_maps_section(struct bpf_map_data *maps, int maps_shndx,
- Elf *elf, Elf_Data *symbols, int strtabidx)
-{
- int map_sz_elf, map_sz_copy;
- bool validate_zero = false;
- Elf_Data *data_maps;
- int i, nr_maps;
- GElf_Sym *sym;
- Elf_Scn *scn;
- int copy_sz;
-
- if (maps_shndx < 0)
- return -EINVAL;
- if (!symbols)
- return -EINVAL;
-
- /* Get data for maps section via elf index */
- scn = elf_getscn(elf, maps_shndx);
- if (scn)
- data_maps = elf_getdata(scn, NULL);
- if (!scn || !data_maps) {
- printf("Failed to get Elf_Data from maps section %d\n",
- maps_shndx);
- return -EINVAL;
- }
-
- /* For each map get corrosponding symbol table entry */
- sym = calloc(MAX_MAPS+1, sizeof(GElf_Sym));
- for (i = 0, nr_maps = 0; i < symbols->d_size / sizeof(GElf_Sym); i++) {
- assert(nr_maps < MAX_MAPS+1);
- if (!gelf_getsym(symbols, i, &sym[nr_maps]))
- continue;
- if (sym[nr_maps].st_shndx != maps_shndx)
- continue;
- /* Only increment iif maps section */
- nr_maps++;
- }
-
- /* Align to map_fd[] order, via sort on offset in sym.st_value */
- qsort(sym, nr_maps, sizeof(GElf_Sym), cmp_symbols);
-
- /* Keeping compatible with ELF maps section changes
- * ------------------------------------------------
- * The program size of struct bpf_load_map_def is known by loader
- * code, but struct stored in ELF file can be different.
- *
- * Unfortunately sym[i].st_size is zero. To calculate the
- * struct size stored in the ELF file, assume all struct have
- * the same size, and simply divide with number of map
- * symbols.
- */
- map_sz_elf = data_maps->d_size / nr_maps;
- map_sz_copy = sizeof(struct bpf_load_map_def);
- if (map_sz_elf < map_sz_copy) {
- /*
- * Backward compat, loading older ELF file with
- * smaller struct, keeping remaining bytes zero.
- */
- map_sz_copy = map_sz_elf;
- } else if (map_sz_elf > map_sz_copy) {
- /*
- * Forward compat, loading newer ELF file with larger
- * struct with unknown features. Assume zero means
- * feature not used. Thus, validate rest of struct
- * data is zero.
- */
- validate_zero = true;
- }
-
- /* Memcpy relevant part of ELF maps data to loader maps */
- for (i = 0; i < nr_maps; i++) {
- struct bpf_load_map_def *def;
- unsigned char *addr, *end;
- const char *map_name;
- size_t offset;
-
- map_name = elf_strptr(elf, strtabidx, sym[i].st_name);
- maps[i].name = strdup(map_name);
- if (!maps[i].name) {
- printf("strdup(%s): %s(%d)\n", map_name,
- strerror(errno), errno);
- free(sym);
- return -errno;
- }
-
- /* Symbol value is offset into ELF maps section data area */
- offset = sym[i].st_value;
- def = (struct bpf_load_map_def *)(data_maps->d_buf + offset);
- maps[i].elf_offset = offset;
- memset(&maps[i].def, 0, sizeof(struct bpf_load_map_def));
- memcpy(&maps[i].def, def, map_sz_copy);
-
- /* Verify no newer features were requested */
- if (validate_zero) {
- addr = (unsigned char *) def + map_sz_copy;
- end = (unsigned char *) def + map_sz_elf;
- for (; addr < end; addr++) {
- if (*addr != 0) {
- free(sym);
- return -EFBIG;
- }
- }
- }
- }
-
- free(sym);
- return nr_maps;
-}
-
-static int do_load_bpf_file(const char *path, fixup_map_cb fixup_map)
-{
- int fd, i, ret, maps_shndx = -1, strtabidx = -1;
- Elf *elf;
- GElf_Ehdr ehdr;
- GElf_Shdr shdr, shdr_prog;
- Elf_Data *data, *data_prog, *data_maps = NULL, *symbols = NULL;
- char *shname, *shname_prog;
- int nr_maps = 0;
-
- /* reset global variables */
- kern_version = 0;
- memset(license, 0, sizeof(license));
- memset(processed_sec, 0, sizeof(processed_sec));
-
- if (elf_version(EV_CURRENT) == EV_NONE)
- return 1;
-
- fd = open(path, O_RDONLY, 0);
- if (fd < 0)
- return 1;
-
- elf = elf_begin(fd, ELF_C_READ, NULL);
-
- if (!elf)
- return 1;
-
- if (gelf_getehdr(elf, &ehdr) != &ehdr)
- return 1;
-
- /* clear all kprobes */
- i = write_kprobe_events("");
-
- /* scan over all elf sections to get license and map info */
- for (i = 1; i < ehdr.e_shnum; i++) {
-
- if (get_sec(elf, i, &ehdr, &shname, &shdr, &data))
- continue;
-
- if (0) /* helpful for llvm debugging */
- printf("section %d:%s data %p size %zd link %d flags %d\n",
- i, shname, data->d_buf, data->d_size,
- shdr.sh_link, (int) shdr.sh_flags);
-
- if (strcmp(shname, "license") == 0) {
- processed_sec[i] = true;
- memcpy(license, data->d_buf, data->d_size);
- } else if (strcmp(shname, "version") == 0) {
- processed_sec[i] = true;
- if (data->d_size != sizeof(int)) {
- printf("invalid size of version section %zd\n",
- data->d_size);
- return 1;
- }
- memcpy(&kern_version, data->d_buf, sizeof(int));
- } else if (strcmp(shname, "maps") == 0) {
- int j;
-
- maps_shndx = i;
- data_maps = data;
- for (j = 0; j < MAX_MAPS; j++)
- map_data[j].fd = -1;
- } else if (shdr.sh_type == SHT_SYMTAB) {
- strtabidx = shdr.sh_link;
- symbols = data;
- }
- }
-
- ret = 1;
-
- if (!symbols) {
- printf("missing SHT_SYMTAB section\n");
- goto done;
- }
-
- if (data_maps) {
- nr_maps = load_elf_maps_section(map_data, maps_shndx,
- elf, symbols, strtabidx);
- if (nr_maps < 0) {
- printf("Error: Failed loading ELF maps (errno:%d):%s\n",
- nr_maps, strerror(-nr_maps));
- goto done;
- }
- if (load_maps(map_data, nr_maps, fixup_map))
- goto done;
- map_data_count = nr_maps;
-
- processed_sec[maps_shndx] = true;
- }
-
- /* process all relo sections, and rewrite bpf insns for maps */
- for (i = 1; i < ehdr.e_shnum; i++) {
- if (processed_sec[i])
- continue;
-
- if (get_sec(elf, i, &ehdr, &shname, &shdr, &data))
- continue;
-
- if (shdr.sh_type == SHT_REL) {
- struct bpf_insn *insns;
-
- /* locate prog sec that need map fixup (relocations) */
- if (get_sec(elf, shdr.sh_info, &ehdr, &shname_prog,
- &shdr_prog, &data_prog))
- continue;
-
- if (shdr_prog.sh_type != SHT_PROGBITS ||
- !(shdr_prog.sh_flags & SHF_EXECINSTR))
- continue;
-
- insns = (struct bpf_insn *) data_prog->d_buf;
- processed_sec[i] = true; /* relo section */
-
- if (parse_relo_and_apply(data, symbols, &shdr, insns,
- map_data, nr_maps))
- continue;
- }
- }
-
- /* load programs */
- for (i = 1; i < ehdr.e_shnum; i++) {
-
- if (processed_sec[i])
- continue;
-
- if (get_sec(elf, i, &ehdr, &shname, &shdr, &data))
- continue;
-
- if (memcmp(shname, "kprobe/", 7) == 0 ||
- memcmp(shname, "kretprobe/", 10) == 0 ||
- memcmp(shname, "tracepoint/", 11) == 0 ||
- memcmp(shname, "raw_tracepoint/", 15) == 0 ||
- memcmp(shname, "xdp", 3) == 0 ||
- memcmp(shname, "perf_event", 10) == 0 ||
- memcmp(shname, "socket", 6) == 0 ||
- memcmp(shname, "cgroup/", 7) == 0 ||
- memcmp(shname, "sockops", 7) == 0 ||
- memcmp(shname, "sk_skb", 6) == 0 ||
- memcmp(shname, "sk_msg", 6) == 0) {
- ret = load_and_attach(shname, data->d_buf,
- data->d_size);
- if (ret != 0)
- goto done;
- }
- }
-
-done:
- close(fd);
- return ret;
-}
-
-int load_bpf_file(char *path)
-{
- return do_load_bpf_file(path, NULL);
-}
-
-int load_bpf_file_fixup_map(const char *path, fixup_map_cb fixup_map)
-{
- return do_load_bpf_file(path, fixup_map);
-}
-
-void read_trace_pipe(void)
-{
- int trace_fd;
-
- trace_fd = open(DEBUGFS "trace_pipe", O_RDONLY, 0);
- if (trace_fd < 0)
- return;
-
- while (1) {
- static char buf[4096];
- ssize_t sz;
-
- sz = read(trace_fd, buf, sizeof(buf) - 1);
- if (sz > 0) {
- buf[sz] = 0;
- puts(buf);
- }
- }
-}
diff --git a/samples/bpf/bpf_load.h b/samples/bpf/bpf_load.h
deleted file mode 100644
index 814894a12974..000000000000
--- a/samples/bpf/bpf_load.h
+++ /dev/null
@@ -1,58 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef __BPF_LOAD_H
-#define __BPF_LOAD_H
-
-#include <bpf/bpf.h>
-
-#define MAX_MAPS 32
-#define MAX_PROGS 32
-
-struct bpf_load_map_def {
- unsigned int type;
- unsigned int key_size;
- unsigned int value_size;
- unsigned int max_entries;
- unsigned int map_flags;
- unsigned int inner_map_idx;
- unsigned int numa_node;
-};
-
-struct bpf_map_data {
- int fd;
- char *name;
- size_t elf_offset;
- struct bpf_load_map_def def;
-};
-
-typedef void (*fixup_map_cb)(struct bpf_map_data *map, int idx);
-
-extern int prog_fd[MAX_PROGS];
-extern int event_fd[MAX_PROGS];
-extern char bpf_log_buf[BPF_LOG_BUF_SIZE];
-extern int prog_cnt;
-
-/* There is a one-to-one mapping between map_fd[] and map_data[].
- * The map_data[] just contains more rich info on the given map.
- */
-extern int map_fd[MAX_MAPS];
-extern struct bpf_map_data map_data[MAX_MAPS];
-extern int map_data_count;
-
-/* parses elf file compiled by llvm .c->.o
- * . parses 'maps' section and creates maps via BPF syscall
- * . parses 'license' section and passes it to syscall
- * . parses elf relocations for BPF maps and adjusts BPF_LD_IMM64 insns by
- * storing map_fd into insn->imm and marking such insns as BPF_PSEUDO_MAP_FD
- * . loads eBPF programs via BPF syscall
- *
- * One ELF file can contain multiple BPF programs which will be loaded
- * and their FDs stored stored in prog_fd array
- *
- * returns zero on success
- */
-int load_bpf_file(char *path);
-int load_bpf_file_fixup_map(const char *path, fixup_map_cb fixup_map);
-
-void read_trace_pipe(void);
-int bpf_set_link_xdp_fd(int ifindex, int fd, __u32 flags);
-#endif
diff --git a/samples/bpf/cookie_uid_helper_example.c b/samples/bpf/cookie_uid_helper_example.c
index deb0e3e0324d..f0df3dda4b1f 100644
--- a/samples/bpf/cookie_uid_helper_example.c
+++ b/samples/bpf/cookie_uid_helper_example.c
@@ -67,8 +67,8 @@ static bool test_finish;
static void maps_create(void)
{
- map_fd = bpf_create_map(BPF_MAP_TYPE_HASH, sizeof(uint32_t),
- sizeof(struct stats), 100, 0);
+ map_fd = bpf_map_create(BPF_MAP_TYPE_HASH, NULL, sizeof(uint32_t),
+ sizeof(struct stats), 100, NULL);
if (map_fd < 0)
error(1, errno, "map create failed!\n");
}
@@ -147,19 +147,23 @@ static void prog_load(void)
*/
BPF_MOV64_REG(BPF_REG_9, BPF_REG_0),
BPF_MOV64_IMM(BPF_REG_1, 1),
- BPF_STX_XADD(BPF_DW, BPF_REG_9, BPF_REG_1,
- offsetof(struct stats, packets)),
+ BPF_ATOMIC_OP(BPF_DW, BPF_ADD, BPF_REG_9, BPF_REG_1,
+ offsetof(struct stats, packets)),
BPF_LDX_MEM(BPF_W, BPF_REG_1, BPF_REG_6,
offsetof(struct __sk_buff, len)),
- BPF_STX_XADD(BPF_DW, BPF_REG_9, BPF_REG_1,
- offsetof(struct stats, bytes)),
+ BPF_ATOMIC_OP(BPF_DW, BPF_ADD, BPF_REG_9, BPF_REG_1,
+ offsetof(struct stats, bytes)),
BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_6,
offsetof(struct __sk_buff, len)),
BPF_EXIT_INSN(),
};
- prog_fd = bpf_load_program(BPF_PROG_TYPE_SOCKET_FILTER, prog,
- ARRAY_SIZE(prog), "GPL", 0,
- log_buf, sizeof(log_buf));
+ LIBBPF_OPTS(bpf_prog_load_opts, opts,
+ .log_buf = log_buf,
+ .log_size = sizeof(log_buf),
+ );
+
+ prog_fd = bpf_prog_load(BPF_PROG_TYPE_SOCKET_FILTER, NULL, "GPL",
+ prog, ARRAY_SIZE(prog), &opts);
if (prog_fd < 0)
error(1, errno, "failed to load prog\n%s\n", log_buf);
}
@@ -167,7 +171,7 @@ static void prog_load(void)
static void prog_attach_iptables(char *file)
{
int ret;
- char rules[100];
+ char rules[256];
if (bpf_obj_pin(prog_fd, file))
error(1, errno, "bpf_obj_pin");
@@ -175,8 +179,13 @@ static void prog_attach_iptables(char *file)
printf("file path too long: %s\n", file);
exit(1);
}
- sprintf(rules, "iptables -A OUTPUT -m bpf --object-pinned %s -j ACCEPT",
- file);
+ ret = snprintf(rules, sizeof(rules),
+ "iptables -A OUTPUT -m bpf --object-pinned %s -j ACCEPT",
+ file);
+ if (ret < 0 || ret >= sizeof(rules)) {
+ printf("error constructing iptables command\n");
+ exit(1);
+ }
ret = system(rules);
if (ret < 0) {
printf("iptables rule update failed: %d/n", WEXITSTATUS(ret));
@@ -313,7 +322,7 @@ int main(int argc, char *argv[])
print_table();
printf("\n");
sleep(1);
- };
+ }
} else if (cfg_test_cookie) {
udp_client();
}
diff --git a/samples/bpf/cpustat_kern.c b/samples/bpf/cpustat_kern.c
index a86a19d5f033..5aefd19cdfa1 100644
--- a/samples/bpf/cpustat_kern.c
+++ b/samples/bpf/cpustat_kern.c
@@ -51,28 +51,28 @@ static int cpu_opps[] = { 208000, 432000, 729000, 960000, 1200000 };
#define MAP_OFF_PSTATE_IDX 3
#define MAP_OFF_NUM 4
-struct bpf_map_def SEC("maps") my_map = {
- .type = BPF_MAP_TYPE_ARRAY,
- .key_size = sizeof(u32),
- .value_size = sizeof(u64),
- .max_entries = MAX_CPU * MAP_OFF_NUM,
-};
+struct {
+ __uint(type, BPF_MAP_TYPE_ARRAY);
+ __type(key, u32);
+ __type(value, u64);
+ __uint(max_entries, MAX_CPU * MAP_OFF_NUM);
+} my_map SEC(".maps");
/* cstate_duration records duration time for every idle state per CPU */
-struct bpf_map_def SEC("maps") cstate_duration = {
- .type = BPF_MAP_TYPE_ARRAY,
- .key_size = sizeof(u32),
- .value_size = sizeof(u64),
- .max_entries = MAX_CPU * MAX_CSTATE_ENTRIES,
-};
+struct {
+ __uint(type, BPF_MAP_TYPE_ARRAY);
+ __type(key, u32);
+ __type(value, u64);
+ __uint(max_entries, MAX_CPU * MAX_CSTATE_ENTRIES);
+} cstate_duration SEC(".maps");
/* pstate_duration records duration time for every operating point per CPU */
-struct bpf_map_def SEC("maps") pstate_duration = {
- .type = BPF_MAP_TYPE_ARRAY,
- .key_size = sizeof(u32),
- .value_size = sizeof(u64),
- .max_entries = MAX_CPU * MAX_PSTATE_ENTRIES,
-};
+struct {
+ __uint(type, BPF_MAP_TYPE_ARRAY);
+ __type(key, u32);
+ __type(value, u64);
+ __uint(max_entries, MAX_CPU * MAX_PSTATE_ENTRIES);
+} pstate_duration SEC(".maps");
/*
* The trace events for cpu_idle and cpu_frequency are taken from:
diff --git a/samples/bpf/cpustat_user.c b/samples/bpf/cpustat_user.c
index 869a99406dbf..ab90bb08a2b4 100644
--- a/samples/bpf/cpustat_user.c
+++ b/samples/bpf/cpustat_user.c
@@ -9,16 +9,16 @@
#include <string.h>
#include <unistd.h>
#include <fcntl.h>
-#include <linux/bpf.h>
#include <locale.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <sys/time.h>
-#include <sys/resource.h>
#include <sys/wait.h>
#include <bpf/bpf.h>
-#include "bpf_load.h"
+#include <bpf/libbpf.h>
+
+static int cstate_map_fd, pstate_map_fd;
#define MAX_CPU 8
#define MAX_PSTATE_ENTRIES 5
@@ -181,21 +181,50 @@ static void int_exit(int sig)
{
cpu_stat_inject_cpu_idle_event();
cpu_stat_inject_cpu_frequency_event();
- cpu_stat_update(map_fd[1], map_fd[2]);
+ cpu_stat_update(cstate_map_fd, pstate_map_fd);
cpu_stat_print();
exit(0);
}
int main(int argc, char **argv)
{
+ struct bpf_link *link = NULL;
+ struct bpf_program *prog;
+ struct bpf_object *obj;
char filename[256];
int ret;
snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]);
+ obj = bpf_object__open_file(filename, NULL);
+ if (libbpf_get_error(obj)) {
+ fprintf(stderr, "ERROR: opening BPF object file failed\n");
+ return 0;
+ }
- if (load_bpf_file(filename)) {
- printf("%s", bpf_log_buf);
- return 1;
+ prog = bpf_object__find_program_by_name(obj, "bpf_prog1");
+ if (!prog) {
+ printf("finding a prog in obj file failed\n");
+ goto cleanup;
+ }
+
+ /* load BPF program */
+ if (bpf_object__load(obj)) {
+ fprintf(stderr, "ERROR: loading BPF object file failed\n");
+ goto cleanup;
+ }
+
+ cstate_map_fd = bpf_object__find_map_fd_by_name(obj, "cstate_duration");
+ pstate_map_fd = bpf_object__find_map_fd_by_name(obj, "pstate_duration");
+ if (cstate_map_fd < 0 || pstate_map_fd < 0) {
+ fprintf(stderr, "ERROR: finding a map in obj file failed\n");
+ goto cleanup;
+ }
+
+ link = bpf_program__attach(prog);
+ if (libbpf_get_error(link)) {
+ fprintf(stderr, "ERROR: bpf_program__attach failed\n");
+ link = NULL;
+ goto cleanup;
}
ret = cpu_stat_inject_cpu_idle_event();
@@ -210,10 +239,13 @@ int main(int argc, char **argv)
signal(SIGTERM, int_exit);
while (1) {
- cpu_stat_update(map_fd[1], map_fd[2]);
+ cpu_stat_update(cstate_map_fd, pstate_map_fd);
cpu_stat_print();
sleep(5);
}
+cleanup:
+ bpf_link__destroy(link);
+ bpf_object__close(obj);
return 0;
}
diff --git a/samples/bpf/do_hbm_test.sh b/samples/bpf/do_hbm_test.sh
index ffe4c0607341..38e4599350db 100755
--- a/samples/bpf/do_hbm_test.sh
+++ b/samples/bpf/do_hbm_test.sh
@@ -10,7 +10,7 @@
Usage() {
echo "Script for testing HBM (Host Bandwidth Manager) framework."
echo "It creates a cgroup to use for testing and load a BPF program to limit"
- echo "egress or ingress bandwidht. It then uses iperf3 or netperf to create"
+ echo "egress or ingress bandwidth. It then uses iperf3 or netperf to create"
echo "loads. The output is the goodput in Mbps (unless -D was used)."
echo ""
echo "USAGE: $name [out] [-b=<prog>|--bpf=<prog>] [-c=<cc>|--cc=<cc>]"
@@ -91,6 +91,16 @@ qdisc=""
flags=""
do_stats=0
+BPFFS=/sys/fs/bpf
+function config_bpffs () {
+ if mount | grep $BPFFS > /dev/null; then
+ echo "bpffs already mounted"
+ else
+ echo "bpffs not mounted. Mounting..."
+ mount -t bpf none $BPFFS
+ fi
+}
+
function start_hbm () {
rm -f hbm.out
echo "./hbm $dir -n $id -r $rate -t $dur $flags $dbg $prog" > hbm.out
@@ -192,6 +202,7 @@ processArgs () {
}
processArgs
+config_bpffs
if [ $debug_flag -eq 1 ] ; then
rm -f hbm_out.log
@@ -201,7 +212,7 @@ hbm_pid=$(start_hbm)
usleep 100000
host=`hostname`
-cg_base_dir=/sys/fs/cgroup
+cg_base_dir=/sys/fs/cgroup/unified
cg_dir="$cg_base_dir/cgroup-test-work-dir/hbm$id"
echo $$ >> $cg_dir/cgroup.procs
@@ -411,23 +422,8 @@ fi
sleep 1
-# Detach any BPF programs that may have lingered
-ttx=`bpftool cgroup tree | grep hbm`
-v=2
-for x in $ttx ; do
- if [ "${x:0:36}" == "/sys/fs/cgroup/cgroup-test-work-dir/" ] ; then
- cg=$x ; v=0
- else
- if [ $v -eq 0 ] ; then
- id=$x ; v=1
- else
- if [ $v -eq 1 ] ; then
- type=$x ; bpftool cgroup detach $cg $type id $id
- v=0
- fi
- fi
- fi
-done
+# Detach any pinned BPF programs that may have lingered
+rm -rf $BPFFS/hbm*
if [ $use_netperf -ne 0 ] ; then
if [ "$server" == "" ] ; then
diff --git a/samples/bpf/fds_example.c b/samples/bpf/fds_example.c
index d5992f787232..88a26f3ce201 100644
--- a/samples/bpf/fds_example.c
+++ b/samples/bpf/fds_example.c
@@ -17,6 +17,7 @@
#include <bpf/libbpf.h>
#include "bpf_insn.h"
#include "sock_example.h"
+#include "bpf_util.h"
#define BPF_F_PIN (1 << 0)
#define BPF_F_GET (1 << 1)
@@ -30,6 +31,8 @@
#define BPF_M_MAP 1
#define BPF_M_PROG 2
+char bpf_log_buf[BPF_LOG_BUF_SIZE];
+
static void usage(void)
{
printf("Usage: fds_example [...]\n");
@@ -44,31 +47,30 @@ static void usage(void)
printf(" -h Display this help.\n");
}
-static int bpf_map_create(void)
-{
- return bpf_create_map(BPF_MAP_TYPE_ARRAY, sizeof(uint32_t),
- sizeof(uint32_t), 1024, 0);
-}
-
static int bpf_prog_create(const char *object)
{
static struct bpf_insn insns[] = {
BPF_MOV64_IMM(BPF_REG_0, 1),
BPF_EXIT_INSN(),
};
- size_t insns_cnt = sizeof(insns) / sizeof(struct bpf_insn);
- char bpf_log_buf[BPF_LOG_BUF_SIZE];
+ size_t insns_cnt = ARRAY_SIZE(insns);
struct bpf_object *obj;
- int prog_fd;
+ int err;
if (object) {
- assert(!bpf_prog_load(object, BPF_PROG_TYPE_UNSPEC,
- &obj, &prog_fd));
- return prog_fd;
+ obj = bpf_object__open_file(object, NULL);
+ assert(!libbpf_get_error(obj));
+ err = bpf_object__load(obj);
+ assert(!err);
+ return bpf_program__fd(bpf_object__next_program(obj, NULL));
} else {
- return bpf_load_program(BPF_PROG_TYPE_SOCKET_FILTER,
- insns, insns_cnt, "GPL", 0,
- bpf_log_buf, BPF_LOG_BUF_SIZE);
+ LIBBPF_OPTS(bpf_prog_load_opts, opts,
+ .log_buf = bpf_log_buf,
+ .log_size = BPF_LOG_BUF_SIZE,
+ );
+
+ return bpf_prog_load(BPF_PROG_TYPE_SOCKET_FILTER, NULL, "GPL",
+ insns, insns_cnt, &opts);
}
}
@@ -78,7 +80,8 @@ static int bpf_do_map(const char *file, uint32_t flags, uint32_t key,
int fd, ret;
if (flags & BPF_F_PIN) {
- fd = bpf_map_create();
+ fd = bpf_map_create(BPF_MAP_TYPE_ARRAY, NULL, sizeof(uint32_t),
+ sizeof(uint32_t), 1024, NULL);
printf("bpf: map fd:%d (%s)\n", fd, strerror(errno));
assert(fd > 0);
diff --git a/samples/bpf/hbm.c b/samples/bpf/hbm.c
index 7d7153777678..516fbac28b71 100644
--- a/samples/bpf/hbm.c
+++ b/samples/bpf/hbm.c
@@ -34,23 +34,20 @@
#include <stdio.h>
#include <stdlib.h>
#include <assert.h>
-#include <sys/resource.h>
#include <sys/time.h>
#include <unistd.h>
#include <errno.h>
#include <fcntl.h>
#include <linux/unistd.h>
+#include <linux/compiler.h>
#include <linux/bpf.h>
#include <bpf/bpf.h>
#include <getopt.h>
-#include "bpf_load.h"
-#include "bpf_rlimit.h"
#include "cgroup_helpers.h"
#include "hbm.h"
#include "bpf_util.h"
-#include <bpf/bpf.h>
#include <bpf/libbpf.h>
bool outFlag = true;
@@ -70,9 +67,9 @@ static void do_error(char *msg, bool errno_flag);
#define DEBUGFS "/sys/kernel/debug/tracing/"
-struct bpf_object *obj;
-int bpfprog_fd;
-int cgroup_storage_fd;
+static struct bpf_program *bpf_prog;
+static struct bpf_object *obj;
+static int queue_stats_fd;
static void read_trace_pipe2(void)
{
@@ -121,56 +118,59 @@ static void do_error(char *msg, bool errno_flag)
static int prog_load(char *prog)
{
- struct bpf_prog_load_attr prog_load_attr = {
- .prog_type = BPF_PROG_TYPE_CGROUP_SKB,
- .file = prog,
- .expected_attach_type = BPF_CGROUP_INET_EGRESS,
- };
- int map_fd;
- struct bpf_map *map;
+ struct bpf_program *pos;
+ const char *sec_name;
- int ret = 0;
-
- if (access(prog, O_RDONLY) < 0) {
- printf("Error accessing file %s: %s\n", prog, strerror(errno));
+ obj = bpf_object__open_file(prog, NULL);
+ if (libbpf_get_error(obj)) {
+ printf("ERROR: opening BPF object file failed\n");
return 1;
}
- if (bpf_prog_load_xattr(&prog_load_attr, &obj, &bpfprog_fd))
- ret = 1;
- if (!ret) {
- map = bpf_object__find_map_by_name(obj, "queue_stats");
- map_fd = bpf_map__fd(map);
- if (map_fd < 0) {
- printf("Map not found: %s\n", strerror(map_fd));
- ret = 1;
+
+ /* load BPF program */
+ if (bpf_object__load(obj)) {
+ printf("ERROR: loading BPF object file failed\n");
+ goto err;
+ }
+
+ bpf_object__for_each_program(pos, obj) {
+ sec_name = bpf_program__section_name(pos);
+ if (sec_name && !strcmp(sec_name, "cgroup_skb/egress")) {
+ bpf_prog = pos;
+ break;
}
}
+ if (!bpf_prog) {
+ printf("ERROR: finding a prog in obj file failed\n");
+ goto err;
+ }
- if (ret) {
- printf("ERROR: bpf_prog_load_xattr failed for: %s\n", prog);
- printf(" Output from verifier:\n%s\n------\n", bpf_log_buf);
- ret = -1;
- } else {
- ret = map_fd;
+ queue_stats_fd = bpf_object__find_map_fd_by_name(obj, "queue_stats");
+ if (queue_stats_fd < 0) {
+ printf("ERROR: finding a map in obj file failed\n");
+ goto err;
}
- return ret;
+ return 0;
+
+err:
+ bpf_object__close(obj);
+ return 1;
}
static int run_bpf_prog(char *prog, int cg_id)
{
- int map_fd;
- int rc = 0;
+ struct hbm_queue_stats qstats = {0};
+ char cg_dir[100], cg_pin_path[100];
+ struct bpf_link *link = NULL;
int key = 0;
int cg1 = 0;
- int type = BPF_CGROUP_INET_EGRESS;
- char cg_dir[100];
- struct hbm_queue_stats qstats = {0};
+ int rc = 0;
sprintf(cg_dir, "/hbm%d", cg_id);
- map_fd = prog_load(prog);
- if (map_fd == -1)
- return 1;
+ rc = prog_load(prog);
+ if (rc != 0)
+ return rc;
if (setup_cgroup_environment()) {
printf("ERROR: setting cgroup environment\n");
@@ -190,16 +190,24 @@ static int run_bpf_prog(char *prog, int cg_id)
qstats.stats = stats_flag ? 1 : 0;
qstats.loopback = loopback_flag ? 1 : 0;
qstats.no_cn = no_cn_flag ? 1 : 0;
- if (bpf_map_update_elem(map_fd, &key, &qstats, BPF_ANY)) {
+ if (bpf_map_update_elem(queue_stats_fd, &key, &qstats, BPF_ANY)) {
printf("ERROR: Could not update map element\n");
goto err;
}
if (!outFlag)
- type = BPF_CGROUP_INET_INGRESS;
- if (bpf_prog_attach(bpfprog_fd, cg1, type, 0)) {
- printf("ERROR: bpf_prog_attach fails!\n");
- log_err("Attaching prog");
+ bpf_program__set_expected_attach_type(bpf_prog, BPF_CGROUP_INET_INGRESS);
+
+ link = bpf_program__attach_cgroup(bpf_prog, cg1);
+ if (libbpf_get_error(link)) {
+ fprintf(stderr, "ERROR: bpf_program__attach_cgroup failed\n");
+ goto err;
+ }
+
+ sprintf(cg_pin_path, "/sys/fs/bpf/hbm%d", cg_id);
+ rc = bpf_link__pin(link, cg_pin_path);
+ if (rc < 0) {
+ printf("ERROR: bpf_link__pin failed: %d\n", rc);
goto err;
}
@@ -213,7 +221,7 @@ static int run_bpf_prog(char *prog, int cg_id)
#define DELTA_RATE_CHECK 10000 /* in us */
#define RATE_THRESHOLD 9500000000 /* 9.5 Gbps */
- bpf_map_lookup_elem(map_fd, &key, &qstats);
+ bpf_map_lookup_elem(queue_stats_fd, &key, &qstats);
if (gettimeofday(&t0, NULL) < 0)
do_error("gettimeofday failed", true);
t_last = t0;
@@ -242,7 +250,7 @@ static int run_bpf_prog(char *prog, int cg_id)
fclose(fin);
printf(" new_eth_tx_bytes:%llu\n",
new_eth_tx_bytes);
- bpf_map_lookup_elem(map_fd, &key, &qstats);
+ bpf_map_lookup_elem(queue_stats_fd, &key, &qstats);
new_cg_tx_bytes = qstats.bytes_total;
delta_bytes = new_eth_tx_bytes - last_eth_tx_bytes;
last_eth_tx_bytes = new_eth_tx_bytes;
@@ -289,14 +297,14 @@ static int run_bpf_prog(char *prog, int cg_id)
rate = minRate;
qstats.rate = rate;
}
- if (bpf_map_update_elem(map_fd, &key, &qstats, BPF_ANY))
+ if (bpf_map_update_elem(queue_stats_fd, &key, &qstats, BPF_ANY))
do_error("update map element fails", false);
}
} else {
sleep(dur);
}
// Get stats!
- if (stats_flag && bpf_map_lookup_elem(map_fd, &key, &qstats)) {
+ if (stats_flag && bpf_map_lookup_elem(queue_stats_fd, &key, &qstats)) {
char fname[100];
FILE *fout;
@@ -394,14 +402,20 @@ static int run_bpf_prog(char *prog, int cg_id)
if (debugFlag)
read_trace_pipe2();
- return rc;
+ goto cleanup;
+
err:
rc = 1;
- if (cg1)
+cleanup:
+ bpf_link__destroy(link);
+ bpf_object__close(obj);
+
+ if (cg1 != -1)
close(cg1);
- cleanup_cgroup_environment();
+ if (rc != 0)
+ cleanup_cgroup_environment();
return rc;
}
@@ -483,7 +497,7 @@ int main(int argc, char **argv)
"Option -%c requires an argument.\n\n",
optopt);
case 'h':
- // fallthrough
+ __fallthrough;
default:
Usage();
return 0;
@@ -494,5 +508,8 @@ int main(int argc, char **argv)
prog = argv[optind];
printf("HBM prog: %s\n", prog != NULL ? prog : "NULL");
+ /* Use libbpf 1.0 API mode */
+ libbpf_set_strict_mode(LIBBPF_STRICT_ALL);
+
return run_bpf_prog(prog, cg_id);
}
diff --git a/samples/bpf/hbm_kern.h b/samples/bpf/hbm_kern.h
index e00f26f6afba..1752a46a2b05 100644
--- a/samples/bpf/hbm_kern.h
+++ b/samples/bpf/hbm_kern.h
@@ -9,8 +9,6 @@
* Include file for sample Host Bandwidth Manager (HBM) BPF programs
*/
#define KBUILD_MODNAME "foo"
-#include <stddef.h>
-#include <stdbool.h>
#include <uapi/linux/bpf.h>
#include <uapi/linux/if_ether.h>
#include <uapi/linux/if_packet.h>
@@ -69,7 +67,7 @@ struct {
__uint(type, BPF_MAP_TYPE_ARRAY);
__uint(max_entries, 1);
__type(key, u32);
- __type(value, struct hvm_queue_stats);
+ __type(value, struct hbm_queue_stats);
} queue_stats SEC(".maps");
struct hbm_pkt_info {
diff --git a/samples/bpf/ibumad_kern.c b/samples/bpf/ibumad_kern.c
index 3a91b4c1989a..9b193231024a 100644
--- a/samples/bpf/ibumad_kern.c
+++ b/samples/bpf/ibumad_kern.c
@@ -1,6 +1,6 @@
// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
-/**
+/*
* ibumad BPF sample kernel side
*
* This program is free software; you can redistribute it and/or
@@ -16,19 +16,19 @@
#include <bpf/bpf_helpers.h>
-struct bpf_map_def SEC("maps") read_count = {
- .type = BPF_MAP_TYPE_ARRAY,
- .key_size = sizeof(u32), /* class; u32 required */
- .value_size = sizeof(u64), /* count of mads read */
- .max_entries = 256, /* Room for all Classes */
-};
+struct {
+ __uint(type, BPF_MAP_TYPE_ARRAY);
+ __type(key, u32); /* class; u32 required */
+ __type(value, u64); /* count of mads read */
+ __uint(max_entries, 256); /* Room for all Classes */
+} read_count SEC(".maps");
-struct bpf_map_def SEC("maps") write_count = {
- .type = BPF_MAP_TYPE_ARRAY,
- .key_size = sizeof(u32), /* class; u32 required */
- .value_size = sizeof(u64), /* count of mads written */
- .max_entries = 256, /* Room for all Classes */
-};
+struct {
+ __uint(type, BPF_MAP_TYPE_ARRAY);
+ __type(key, u32); /* class; u32 required */
+ __type(value, u64); /* count of mads written */
+ __uint(max_entries, 256); /* Room for all Classes */
+} write_count SEC(".maps");
#undef DEBUG
#ifndef DEBUG
diff --git a/samples/bpf/ibumad_user.c b/samples/bpf/ibumad_user.c
index fa06eef31a84..d074c978aac7 100644
--- a/samples/bpf/ibumad_user.c
+++ b/samples/bpf/ibumad_user.c
@@ -1,6 +1,6 @@
// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
-/**
+/*
* ibumad BPF sample user side
*
* This program is free software; you can redistribute it and/or
@@ -19,14 +19,18 @@
#include <sys/types.h>
#include <limits.h>
-#include <sys/resource.h>
#include <getopt.h>
#include <net/if.h>
-#include "bpf_load.h"
+#include <bpf/bpf.h>
#include "bpf_util.h"
#include <bpf/libbpf.h>
+static struct bpf_link *tp_links[3];
+static struct bpf_object *obj;
+static int map_fd[2];
+static int tp_cnt;
+
static void dump_counts(int fd)
{
__u32 key;
@@ -53,6 +57,11 @@ static void dump_all_counts(void)
static void dump_exit(int sig)
{
dump_all_counts();
+ /* Detach tracepoints */
+ while (tp_cnt)
+ bpf_link__destroy(tp_links[--tp_cnt]);
+
+ bpf_object__close(obj);
exit(0);
}
@@ -73,19 +82,11 @@ static void usage(char *cmd)
int main(int argc, char **argv)
{
+ struct bpf_program *prog;
unsigned long delay = 5;
+ char filename[256];
int longindex = 0;
- int opt;
- char bpf_file[256];
-
- /* Create the eBPF kernel code path name.
- * This follows the pattern of all of the other bpf samples
- */
- snprintf(bpf_file, sizeof(bpf_file), "%s_kern.o", argv[0]);
-
- /* Do one final dump when exiting */
- signal(SIGINT, dump_exit);
- signal(SIGTERM, dump_exit);
+ int opt, err = -1;
while ((opt = getopt_long(argc, argv, "hd:rSw",
long_options, &longindex)) != -1) {
@@ -107,16 +108,51 @@ int main(int argc, char **argv)
}
}
- if (load_bpf_file(bpf_file)) {
- fprintf(stderr, "ERROR: failed to load eBPF from file : %s\n",
- bpf_file);
- return 1;
+ /* Do one final dump when exiting */
+ signal(SIGINT, dump_exit);
+ signal(SIGTERM, dump_exit);
+
+ snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]);
+ obj = bpf_object__open_file(filename, NULL);
+ if (libbpf_get_error(obj)) {
+ fprintf(stderr, "ERROR: opening BPF object file failed\n");
+ return err;
+ }
+
+ /* load BPF program */
+ if (bpf_object__load(obj)) {
+ fprintf(stderr, "ERROR: loading BPF object file failed\n");
+ goto cleanup;
+ }
+
+ map_fd[0] = bpf_object__find_map_fd_by_name(obj, "read_count");
+ map_fd[1] = bpf_object__find_map_fd_by_name(obj, "write_count");
+ if (map_fd[0] < 0 || map_fd[1] < 0) {
+ fprintf(stderr, "ERROR: finding a map in obj file failed\n");
+ goto cleanup;
+ }
+
+ bpf_object__for_each_program(prog, obj) {
+ tp_links[tp_cnt] = bpf_program__attach(prog);
+ if (libbpf_get_error(tp_links[tp_cnt])) {
+ fprintf(stderr, "ERROR: bpf_program__attach failed\n");
+ tp_links[tp_cnt] = NULL;
+ goto cleanup;
+ }
+ tp_cnt++;
}
while (1) {
sleep(delay);
dump_all_counts();
}
+ err = 0;
+
+cleanup:
+ /* Detach tracepoints */
+ while (tp_cnt)
+ bpf_link__destroy(tp_links[--tp_cnt]);
- return 0;
+ bpf_object__close(obj);
+ return err;
}
diff --git a/samples/bpf/lathist_kern.c b/samples/bpf/lathist_kern.c
index ca9c2e4e69aa..4adfcbbe6ef4 100644
--- a/samples/bpf/lathist_kern.c
+++ b/samples/bpf/lathist_kern.c
@@ -18,12 +18,12 @@
* trace_preempt_[on|off] tracepoints hooks is not supported.
*/
-struct bpf_map_def SEC("maps") my_map = {
- .type = BPF_MAP_TYPE_ARRAY,
- .key_size = sizeof(int),
- .value_size = sizeof(u64),
- .max_entries = MAX_CPU,
-};
+struct {
+ __uint(type, BPF_MAP_TYPE_ARRAY);
+ __type(key, int);
+ __type(value, u64);
+ __uint(max_entries, MAX_CPU);
+} my_map SEC(".maps");
SEC("kprobe/trace_preempt_off")
int bpf_prog1(struct pt_regs *ctx)
@@ -61,12 +61,12 @@ static unsigned int log2l(unsigned long v)
return log2(v);
}
-struct bpf_map_def SEC("maps") my_lat = {
- .type = BPF_MAP_TYPE_ARRAY,
- .key_size = sizeof(int),
- .value_size = sizeof(long),
- .max_entries = MAX_CPU * MAX_ENTRIES,
-};
+struct {
+ __uint(type, BPF_MAP_TYPE_ARRAY);
+ __type(key, int);
+ __type(value, long);
+ __uint(max_entries, MAX_CPU * MAX_ENTRIES);
+} my_lat SEC(".maps");
SEC("kprobe/trace_preempt_on")
int bpf_prog2(struct pt_regs *ctx)
diff --git a/samples/bpf/lathist_user.c b/samples/bpf/lathist_user.c
index 2ff2839a52d5..7d8ff2418303 100644
--- a/samples/bpf/lathist_user.c
+++ b/samples/bpf/lathist_user.c
@@ -6,9 +6,8 @@
#include <unistd.h>
#include <stdlib.h>
#include <signal.h>
-#include <linux/bpf.h>
+#include <bpf/libbpf.h>
#include <bpf/bpf.h>
-#include "bpf_load.h"
#define MAX_ENTRIES 20
#define MAX_CPU 4
@@ -81,20 +80,51 @@ static void get_data(int fd)
int main(int argc, char **argv)
{
+ struct bpf_link *links[2];
+ struct bpf_program *prog;
+ struct bpf_object *obj;
char filename[256];
+ int map_fd, i = 0;
snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]);
+ obj = bpf_object__open_file(filename, NULL);
+ if (libbpf_get_error(obj)) {
+ fprintf(stderr, "ERROR: opening BPF object file failed\n");
+ return 0;
+ }
+
+ /* load BPF program */
+ if (bpf_object__load(obj)) {
+ fprintf(stderr, "ERROR: loading BPF object file failed\n");
+ goto cleanup;
+ }
- if (load_bpf_file(filename)) {
- printf("%s", bpf_log_buf);
- return 1;
+ map_fd = bpf_object__find_map_fd_by_name(obj, "my_lat");
+ if (map_fd < 0) {
+ fprintf(stderr, "ERROR: finding a map in obj file failed\n");
+ goto cleanup;
+ }
+
+ bpf_object__for_each_program(prog, obj) {
+ links[i] = bpf_program__attach(prog);
+ if (libbpf_get_error(links[i])) {
+ fprintf(stderr, "ERROR: bpf_program__attach failed\n");
+ links[i] = NULL;
+ goto cleanup;
+ }
+ i++;
}
while (1) {
- get_data(map_fd[1]);
+ get_data(map_fd);
print_hist();
sleep(5);
}
+cleanup:
+ for (i--; i >= 0; i--)
+ bpf_link__destroy(links[i]);
+
+ bpf_object__close(obj);
return 0;
}
diff --git a/samples/bpf/lwt_len_hist.sh b/samples/bpf/lwt_len_hist.sh
index 090b96eaf7f7..0eda9754f50b 100644..100755
--- a/samples/bpf/lwt_len_hist.sh
+++ b/samples/bpf/lwt_len_hist.sh
@@ -8,6 +8,8 @@ VETH1=tst_lwt1b
TRACE_ROOT=/sys/kernel/debug/tracing
function cleanup {
+ # To reset saved histogram, remove pinned map
+ rm /sys/fs/bpf/tc/globals/lwt_len_hist_map
ip route del 192.168.253.2/32 dev $VETH0 2> /dev/null
ip link del $VETH0 2> /dev/null
ip link del $VETH1 2> /dev/null
diff --git a/samples/bpf/lwt_len_hist_kern.c b/samples/bpf/lwt_len_hist_kern.c
index 9ed63e10e170..1fa14c54963a 100644
--- a/samples/bpf/lwt_len_hist_kern.c
+++ b/samples/bpf/lwt_len_hist_kern.c
@@ -16,13 +16,6 @@
#include <uapi/linux/in.h>
#include <bpf/bpf_helpers.h>
-# define printk(fmt, ...) \
- ({ \
- char ____fmt[] = fmt; \
- bpf_trace_printk(____fmt, sizeof(____fmt), \
- ##__VA_ARGS__); \
- })
-
struct bpf_elf_map {
__u32 type;
__u32 size_key;
diff --git a/samples/bpf/lwt_len_hist_user.c b/samples/bpf/lwt_len_hist_user.c
index 587b68b1f8dd..430a4b7e353e 100644
--- a/samples/bpf/lwt_len_hist_user.c
+++ b/samples/bpf/lwt_len_hist_user.c
@@ -15,8 +15,6 @@
#define MAX_INDEX 64
#define MAX_STARS 38
-char bpf_log_buf[BPF_LOG_BUF_SIZE];
-
static void stars(char *str, long val, long max, int width)
{
int i;
diff --git a/samples/bpf/map_perf_test_kern.c b/samples/bpf/map_perf_test_kern.c
index 12e91ae64d4d..7342c5b2f278 100644
--- a/samples/bpf/map_perf_test_kern.c
+++ b/samples/bpf/map_perf_test_kern.c
@@ -9,154 +9,172 @@
#include <linux/version.h>
#include <uapi/linux/bpf.h>
#include <bpf/bpf_helpers.h>
-#include "bpf_legacy.h"
#include <bpf/bpf_tracing.h>
+#include <bpf/bpf_core_read.h>
+#include "trace_common.h"
#define MAX_ENTRIES 1000
#define MAX_NR_CPUS 1024
-struct bpf_map_def_legacy SEC("maps") hash_map = {
- .type = BPF_MAP_TYPE_HASH,
- .key_size = sizeof(u32),
- .value_size = sizeof(long),
- .max_entries = MAX_ENTRIES,
+struct {
+ __uint(type, BPF_MAP_TYPE_HASH);
+ __type(key, u32);
+ __type(value, long);
+ __uint(max_entries, MAX_ENTRIES);
+} hash_map SEC(".maps");
+
+struct {
+ __uint(type, BPF_MAP_TYPE_LRU_HASH);
+ __type(key, u32);
+ __type(value, long);
+ __uint(max_entries, 10000);
+} lru_hash_map SEC(".maps");
+
+struct {
+ __uint(type, BPF_MAP_TYPE_LRU_HASH);
+ __type(key, u32);
+ __type(value, long);
+ __uint(max_entries, 10000);
+ __uint(map_flags, BPF_F_NO_COMMON_LRU);
+} nocommon_lru_hash_map SEC(".maps");
+
+struct inner_lru {
+ __uint(type, BPF_MAP_TYPE_LRU_HASH);
+ __type(key, u32);
+ __type(value, long);
+ __uint(max_entries, MAX_ENTRIES);
+ __uint(map_flags, BPF_F_NUMA_NODE);
+ __uint(numa_node, 0);
+} inner_lru_hash_map SEC(".maps");
+
+struct {
+ __uint(type, BPF_MAP_TYPE_ARRAY_OF_MAPS);
+ __uint(max_entries, MAX_NR_CPUS);
+ __uint(key_size, sizeof(u32));
+ __array(values, struct inner_lru); /* use inner_lru as inner map */
+} array_of_lru_hashs SEC(".maps") = {
+ /* statically initialize the first element */
+ .values = { &inner_lru_hash_map },
};
-struct bpf_map_def_legacy SEC("maps") lru_hash_map = {
- .type = BPF_MAP_TYPE_LRU_HASH,
- .key_size = sizeof(u32),
- .value_size = sizeof(long),
- .max_entries = 10000,
-};
-
-struct bpf_map_def_legacy SEC("maps") nocommon_lru_hash_map = {
- .type = BPF_MAP_TYPE_LRU_HASH,
- .key_size = sizeof(u32),
- .value_size = sizeof(long),
- .max_entries = 10000,
- .map_flags = BPF_F_NO_COMMON_LRU,
-};
-
-struct bpf_map_def_legacy SEC("maps") inner_lru_hash_map = {
- .type = BPF_MAP_TYPE_LRU_HASH,
- .key_size = sizeof(u32),
- .value_size = sizeof(long),
- .max_entries = MAX_ENTRIES,
- .map_flags = BPF_F_NUMA_NODE,
- .numa_node = 0,
-};
-
-struct bpf_map_def_legacy SEC("maps") array_of_lru_hashs = {
- .type = BPF_MAP_TYPE_ARRAY_OF_MAPS,
- .key_size = sizeof(u32),
- .max_entries = MAX_NR_CPUS,
-};
-
-struct bpf_map_def_legacy SEC("maps") percpu_hash_map = {
- .type = BPF_MAP_TYPE_PERCPU_HASH,
- .key_size = sizeof(u32),
- .value_size = sizeof(long),
- .max_entries = MAX_ENTRIES,
-};
-
-struct bpf_map_def_legacy SEC("maps") hash_map_alloc = {
- .type = BPF_MAP_TYPE_HASH,
- .key_size = sizeof(u32),
- .value_size = sizeof(long),
- .max_entries = MAX_ENTRIES,
- .map_flags = BPF_F_NO_PREALLOC,
-};
-
-struct bpf_map_def_legacy SEC("maps") percpu_hash_map_alloc = {
- .type = BPF_MAP_TYPE_PERCPU_HASH,
- .key_size = sizeof(u32),
- .value_size = sizeof(long),
- .max_entries = MAX_ENTRIES,
- .map_flags = BPF_F_NO_PREALLOC,
-};
-
-struct bpf_map_def_legacy SEC("maps") lpm_trie_map_alloc = {
- .type = BPF_MAP_TYPE_LPM_TRIE,
- .key_size = 8,
- .value_size = sizeof(long),
- .max_entries = 10000,
- .map_flags = BPF_F_NO_PREALLOC,
-};
-
-struct bpf_map_def_legacy SEC("maps") array_map = {
- .type = BPF_MAP_TYPE_ARRAY,
- .key_size = sizeof(u32),
- .value_size = sizeof(long),
- .max_entries = MAX_ENTRIES,
-};
-
-struct bpf_map_def_legacy SEC("maps") lru_hash_lookup_map = {
- .type = BPF_MAP_TYPE_LRU_HASH,
- .key_size = sizeof(u32),
- .value_size = sizeof(long),
- .max_entries = MAX_ENTRIES,
-};
-
-SEC("kprobe/sys_getuid")
+struct {
+ __uint(type, BPF_MAP_TYPE_PERCPU_HASH);
+ __uint(key_size, sizeof(u32));
+ __uint(value_size, sizeof(long));
+ __uint(max_entries, MAX_ENTRIES);
+} percpu_hash_map SEC(".maps");
+
+struct {
+ __uint(type, BPF_MAP_TYPE_HASH);
+ __type(key, u32);
+ __type(value, long);
+ __uint(max_entries, MAX_ENTRIES);
+ __uint(map_flags, BPF_F_NO_PREALLOC);
+} hash_map_alloc SEC(".maps");
+
+struct {
+ __uint(type, BPF_MAP_TYPE_PERCPU_HASH);
+ __uint(key_size, sizeof(u32));
+ __uint(value_size, sizeof(long));
+ __uint(max_entries, MAX_ENTRIES);
+ __uint(map_flags, BPF_F_NO_PREALLOC);
+} percpu_hash_map_alloc SEC(".maps");
+
+struct {
+ __uint(type, BPF_MAP_TYPE_LPM_TRIE);
+ __uint(key_size, 8);
+ __uint(value_size, sizeof(long));
+ __uint(max_entries, 10000);
+ __uint(map_flags, BPF_F_NO_PREALLOC);
+} lpm_trie_map_alloc SEC(".maps");
+
+struct {
+ __uint(type, BPF_MAP_TYPE_ARRAY);
+ __type(key, u32);
+ __type(value, long);
+ __uint(max_entries, MAX_ENTRIES);
+} array_map SEC(".maps");
+
+struct {
+ __uint(type, BPF_MAP_TYPE_LRU_HASH);
+ __type(key, u32);
+ __type(value, long);
+ __uint(max_entries, MAX_ENTRIES);
+} lru_hash_lookup_map SEC(".maps");
+
+SEC("kprobe/" SYSCALL(sys_getuid))
int stress_hmap(struct pt_regs *ctx)
{
u32 key = bpf_get_current_pid_tgid();
long init_val = 1;
long *value;
+ int i;
- bpf_map_update_elem(&hash_map, &key, &init_val, BPF_ANY);
- value = bpf_map_lookup_elem(&hash_map, &key);
- if (value)
- bpf_map_delete_elem(&hash_map, &key);
+ for (i = 0; i < 10; i++) {
+ bpf_map_update_elem(&hash_map, &key, &init_val, BPF_ANY);
+ value = bpf_map_lookup_elem(&hash_map, &key);
+ if (value)
+ bpf_map_delete_elem(&hash_map, &key);
+ }
return 0;
}
-SEC("kprobe/sys_geteuid")
+SEC("kprobe/" SYSCALL(sys_geteuid))
int stress_percpu_hmap(struct pt_regs *ctx)
{
u32 key = bpf_get_current_pid_tgid();
long init_val = 1;
long *value;
+ int i;
- bpf_map_update_elem(&percpu_hash_map, &key, &init_val, BPF_ANY);
- value = bpf_map_lookup_elem(&percpu_hash_map, &key);
- if (value)
- bpf_map_delete_elem(&percpu_hash_map, &key);
+ for (i = 0; i < 10; i++) {
+ bpf_map_update_elem(&percpu_hash_map, &key, &init_val, BPF_ANY);
+ value = bpf_map_lookup_elem(&percpu_hash_map, &key);
+ if (value)
+ bpf_map_delete_elem(&percpu_hash_map, &key);
+ }
return 0;
}
-SEC("kprobe/sys_getgid")
+SEC("kprobe/" SYSCALL(sys_getgid))
int stress_hmap_alloc(struct pt_regs *ctx)
{
u32 key = bpf_get_current_pid_tgid();
long init_val = 1;
long *value;
+ int i;
- bpf_map_update_elem(&hash_map_alloc, &key, &init_val, BPF_ANY);
- value = bpf_map_lookup_elem(&hash_map_alloc, &key);
- if (value)
- bpf_map_delete_elem(&hash_map_alloc, &key);
+ for (i = 0; i < 10; i++) {
+ bpf_map_update_elem(&hash_map_alloc, &key, &init_val, BPF_ANY);
+ value = bpf_map_lookup_elem(&hash_map_alloc, &key);
+ if (value)
+ bpf_map_delete_elem(&hash_map_alloc, &key);
+ }
return 0;
}
-SEC("kprobe/sys_getegid")
+SEC("kprobe/" SYSCALL(sys_getegid))
int stress_percpu_hmap_alloc(struct pt_regs *ctx)
{
u32 key = bpf_get_current_pid_tgid();
long init_val = 1;
long *value;
+ int i;
- bpf_map_update_elem(&percpu_hash_map_alloc, &key, &init_val, BPF_ANY);
- value = bpf_map_lookup_elem(&percpu_hash_map_alloc, &key);
- if (value)
- bpf_map_delete_elem(&percpu_hash_map_alloc, &key);
+ for (i = 0; i < 10; i++) {
+ bpf_map_update_elem(&percpu_hash_map_alloc, &key, &init_val, BPF_ANY);
+ value = bpf_map_lookup_elem(&percpu_hash_map_alloc, &key);
+ if (value)
+ bpf_map_delete_elem(&percpu_hash_map_alloc, &key);
+ }
return 0;
}
-SEC("kprobe/sys_connect")
+SEC("kprobe/" SYSCALL(sys_connect))
int stress_lru_hmap_alloc(struct pt_regs *ctx)
{
+ struct pt_regs *real_regs = (struct pt_regs *)PT_REGS_PARM1_CORE(ctx);
char fmt[] = "Failed at stress_lru_hmap_alloc. ret:%dn";
union {
u16 dst6[8];
@@ -175,8 +193,8 @@ int stress_lru_hmap_alloc(struct pt_regs *ctx)
long val = 1;
u32 key = 0;
- in6 = (struct sockaddr_in6 *)PT_REGS_PARM2(ctx);
- addrlen = (int)PT_REGS_PARM3(ctx);
+ in6 = (struct sockaddr_in6 *)PT_REGS_PARM2_CORE(real_regs);
+ addrlen = (int)PT_REGS_PARM3_CORE(real_regs);
if (addrlen != sizeof(*in6))
return 0;
@@ -233,7 +251,7 @@ done:
return 0;
}
-SEC("kprobe/sys_gettid")
+SEC("kprobe/" SYSCALL(sys_gettid))
int stress_lpm_trie_map_alloc(struct pt_regs *ctx)
{
union {
@@ -255,7 +273,7 @@ int stress_lpm_trie_map_alloc(struct pt_regs *ctx)
return 0;
}
-SEC("kprobe/sys_getpgid")
+SEC("kprobe/" SYSCALL(sys_getpgid))
int stress_hash_map_lookup(struct pt_regs *ctx)
{
u32 key = 1, i;
@@ -268,7 +286,7 @@ int stress_hash_map_lookup(struct pt_regs *ctx)
return 0;
}
-SEC("kprobe/sys_getppid")
+SEC("kprobe/" SYSCALL(sys_getppid))
int stress_array_map_lookup(struct pt_regs *ctx)
{
u32 key = 1, i;
diff --git a/samples/bpf/map_perf_test_user.c b/samples/bpf/map_perf_test_user.c
index fe5564bff39b..1bb53f4b29e1 100644
--- a/samples/bpf/map_perf_test_user.c
+++ b/samples/bpf/map_perf_test_user.c
@@ -11,15 +11,13 @@
#include <sys/wait.h>
#include <stdlib.h>
#include <signal.h>
-#include <linux/bpf.h>
#include <string.h>
#include <time.h>
-#include <sys/resource.h>
#include <arpa/inet.h>
#include <errno.h>
#include <bpf/bpf.h>
-#include "bpf_load.h"
+#include <bpf/libbpf.h>
#define TEST_BIT(t) (1U << (t))
#define MAX_NR_CPUS 1024
@@ -61,14 +59,20 @@ const char *test_map_names[NR_TESTS] = {
[LRU_HASH_LOOKUP] = "lru_hash_lookup_map",
};
+enum map_idx {
+ array_of_lru_hashs_idx,
+ hash_map_alloc_idx,
+ lru_hash_lookup_idx,
+ NR_IDXES,
+};
+
+static int map_fd[NR_IDXES];
+
static int test_flags = ~0;
static uint32_t num_map_entries;
static uint32_t inner_lru_hash_size;
-static int inner_lru_hash_idx = -1;
-static int array_of_lru_hashs_idx = -1;
-static int lru_hash_lookup_idx = -1;
static int lru_hash_lookup_test_entries = 32;
-static uint32_t max_cnt = 1000000;
+static uint32_t max_cnt = 10000;
static int check_test_flags(enum test_type t)
{
@@ -122,30 +126,33 @@ static void do_test_lru(enum test_type test, int cpu)
__u64 start_time;
int i, ret;
- if (test == INNER_LRU_HASH_PREALLOC) {
+ if (test == INNER_LRU_HASH_PREALLOC && cpu) {
+ /* If CPU is not 0, create inner_lru hash map and insert the fd
+ * value into the array_of_lru_hash map. In case of CPU 0,
+ * 'inner_lru_hash_map' was statically inserted on the map init
+ */
int outer_fd = map_fd[array_of_lru_hashs_idx];
unsigned int mycpu, mynode;
+ LIBBPF_OPTS(bpf_map_create_opts, opts,
+ .map_flags = BPF_F_NUMA_NODE,
+ );
assert(cpu < MAX_NR_CPUS);
- if (cpu) {
- ret = syscall(__NR_getcpu, &mycpu, &mynode, NULL);
- assert(!ret);
-
- inner_lru_map_fds[cpu] =
- bpf_create_map_node(BPF_MAP_TYPE_LRU_HASH,
- test_map_names[INNER_LRU_HASH_PREALLOC],
- sizeof(uint32_t),
- sizeof(long),
- inner_lru_hash_size, 0,
- mynode);
- if (inner_lru_map_fds[cpu] == -1) {
- printf("cannot create BPF_MAP_TYPE_LRU_HASH %s(%d)\n",
- strerror(errno), errno);
- exit(1);
- }
- } else {
- inner_lru_map_fds[cpu] = map_fd[inner_lru_hash_idx];
+ ret = syscall(__NR_getcpu, &mycpu, &mynode, NULL);
+ assert(!ret);
+
+ opts.numa_node = mynode;
+ inner_lru_map_fds[cpu] =
+ bpf_map_create(BPF_MAP_TYPE_LRU_HASH,
+ test_map_names[INNER_LRU_HASH_PREALLOC],
+ sizeof(uint32_t),
+ sizeof(long),
+ inner_lru_hash_size, &opts);
+ if (inner_lru_map_fds[cpu] == -1) {
+ printf("cannot create BPF_MAP_TYPE_LRU_HASH %s(%d)\n",
+ strerror(errno), errno);
+ exit(1);
}
ret = bpf_map_update_elem(outer_fd, &cpu,
@@ -377,7 +384,8 @@ static void fill_lpm_trie(void)
key->data[1] = rand() & 0xff;
key->data[2] = rand() & 0xff;
key->data[3] = rand() & 0xff;
- r = bpf_map_update_elem(map_fd[6], key, &value, 0);
+ r = bpf_map_update_elem(map_fd[hash_map_alloc_idx],
+ key, &value, 0);
assert(!r);
}
@@ -388,59 +396,46 @@ static void fill_lpm_trie(void)
key->data[3] = 1;
value = 128;
- r = bpf_map_update_elem(map_fd[6], key, &value, 0);
+ r = bpf_map_update_elem(map_fd[hash_map_alloc_idx], key, &value, 0);
assert(!r);
}
-static void fixup_map(struct bpf_map_data *map, int idx)
+static void fixup_map(struct bpf_object *obj)
{
+ struct bpf_map *map;
int i;
- if (!strcmp("inner_lru_hash_map", map->name)) {
- inner_lru_hash_idx = idx;
- inner_lru_hash_size = map->def.max_entries;
- }
+ bpf_object__for_each_map(map, obj) {
+ const char *name = bpf_map__name(map);
- if (!strcmp("array_of_lru_hashs", map->name)) {
- if (inner_lru_hash_idx == -1) {
- printf("inner_lru_hash_map must be defined before array_of_lru_hashs\n");
- exit(1);
+ /* Only change the max_entries for the enabled test(s) */
+ for (i = 0; i < NR_TESTS; i++) {
+ if (!strcmp(test_map_names[i], name) &&
+ (check_test_flags(i))) {
+ bpf_map__set_max_entries(map, num_map_entries);
+ continue;
+ }
}
- map->def.inner_map_idx = inner_lru_hash_idx;
- array_of_lru_hashs_idx = idx;
}
- if (!strcmp("lru_hash_lookup_map", map->name))
- lru_hash_lookup_idx = idx;
-
- if (num_map_entries <= 0)
- return;
-
inner_lru_hash_size = num_map_entries;
-
- /* Only change the max_entries for the enabled test(s) */
- for (i = 0; i < NR_TESTS; i++) {
- if (!strcmp(test_map_names[i], map->name) &&
- (check_test_flags(i))) {
- map->def.max_entries = num_map_entries;
- }
- }
}
int main(int argc, char **argv)
{
- struct rlimit r = {RLIM_INFINITY, RLIM_INFINITY};
+ int nr_cpus = sysconf(_SC_NPROCESSORS_ONLN);
+ struct bpf_link *links[8];
+ struct bpf_program *prog;
+ struct bpf_object *obj;
+ struct bpf_map *map;
char filename[256];
- int num_cpu = 8;
-
- snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]);
- setrlimit(RLIMIT_MEMLOCK, &r);
+ int i = 0;
if (argc > 1)
test_flags = atoi(argv[1]) ? : test_flags;
if (argc > 2)
- num_cpu = atoi(argv[2]) ? : num_cpu;
+ nr_cpus = atoi(argv[2]) ? : nr_cpus;
if (argc > 3)
num_map_entries = atoi(argv[3]);
@@ -448,14 +443,61 @@ int main(int argc, char **argv)
if (argc > 4)
max_cnt = atoi(argv[4]);
- if (load_bpf_file_fixup_map(filename, fixup_map)) {
- printf("%s", bpf_log_buf);
- return 1;
+ snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]);
+ obj = bpf_object__open_file(filename, NULL);
+ if (libbpf_get_error(obj)) {
+ fprintf(stderr, "ERROR: opening BPF object file failed\n");
+ return 0;
+ }
+
+ map = bpf_object__find_map_by_name(obj, "inner_lru_hash_map");
+ if (libbpf_get_error(map)) {
+ fprintf(stderr, "ERROR: finding a map in obj file failed\n");
+ goto cleanup;
+ }
+
+ inner_lru_hash_size = bpf_map__max_entries(map);
+ if (!inner_lru_hash_size) {
+ fprintf(stderr, "ERROR: failed to get map attribute\n");
+ goto cleanup;
+ }
+
+ /* resize BPF map prior to loading */
+ if (num_map_entries > 0)
+ fixup_map(obj);
+
+ /* load BPF program */
+ if (bpf_object__load(obj)) {
+ fprintf(stderr, "ERROR: loading BPF object file failed\n");
+ goto cleanup;
+ }
+
+ map_fd[0] = bpf_object__find_map_fd_by_name(obj, "array_of_lru_hashs");
+ map_fd[1] = bpf_object__find_map_fd_by_name(obj, "hash_map_alloc");
+ map_fd[2] = bpf_object__find_map_fd_by_name(obj, "lru_hash_lookup_map");
+ if (map_fd[0] < 0 || map_fd[1] < 0 || map_fd[2] < 0) {
+ fprintf(stderr, "ERROR: finding a map in obj file failed\n");
+ goto cleanup;
+ }
+
+ bpf_object__for_each_program(prog, obj) {
+ links[i] = bpf_program__attach(prog);
+ if (libbpf_get_error(links[i])) {
+ fprintf(stderr, "ERROR: bpf_program__attach failed\n");
+ links[i] = NULL;
+ goto cleanup;
+ }
+ i++;
}
fill_lpm_trie();
- run_perf_test(num_cpu);
+ run_perf_test(nr_cpus);
+
+cleanup:
+ for (i--; i >= 0; i--)
+ bpf_link__destroy(links[i]);
+ bpf_object__close(obj);
return 0;
}
diff --git a/samples/bpf/offwaketime_kern.c b/samples/bpf/offwaketime_kern.c
index c4ec10dbfc3b..eb4d94742e6b 100644
--- a/samples/bpf/offwaketime_kern.c
+++ b/samples/bpf/offwaketime_kern.c
@@ -5,16 +5,22 @@
* License as published by the Free Software Foundation.
*/
#include <uapi/linux/bpf.h>
-#include <bpf/bpf_helpers.h>
-#include <bpf/bpf_tracing.h>
#include <uapi/linux/ptrace.h>
#include <uapi/linux/perf_event.h>
#include <linux/version.h>
#include <linux/sched.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
-#define _(P) ({typeof(P) val; bpf_probe_read(&val, sizeof(val), &P); val;})
+#define _(P) \
+ ({ \
+ typeof(P) val; \
+ bpf_probe_read_kernel(&val, sizeof(val), &(P)); \
+ val; \
+ })
#define MINBLOCK_US 1
+#define MAX_ENTRIES 10000
struct key_t {
char waker[TASK_COMM_LEN];
@@ -23,38 +29,38 @@ struct key_t {
u32 tret;
};
-struct bpf_map_def SEC("maps") counts = {
- .type = BPF_MAP_TYPE_HASH,
- .key_size = sizeof(struct key_t),
- .value_size = sizeof(u64),
- .max_entries = 10000,
-};
+struct {
+ __uint(type, BPF_MAP_TYPE_HASH);
+ __type(key, struct key_t);
+ __type(value, u64);
+ __uint(max_entries, MAX_ENTRIES);
+} counts SEC(".maps");
-struct bpf_map_def SEC("maps") start = {
- .type = BPF_MAP_TYPE_HASH,
- .key_size = sizeof(u32),
- .value_size = sizeof(u64),
- .max_entries = 10000,
-};
+struct {
+ __uint(type, BPF_MAP_TYPE_HASH);
+ __type(key, u32);
+ __type(value, u64);
+ __uint(max_entries, MAX_ENTRIES);
+} start SEC(".maps");
struct wokeby_t {
char name[TASK_COMM_LEN];
u32 ret;
};
-struct bpf_map_def SEC("maps") wokeby = {
- .type = BPF_MAP_TYPE_HASH,
- .key_size = sizeof(u32),
- .value_size = sizeof(struct wokeby_t),
- .max_entries = 10000,
-};
+struct {
+ __uint(type, BPF_MAP_TYPE_HASH);
+ __type(key, u32);
+ __type(value, struct wokeby_t);
+ __uint(max_entries, MAX_ENTRIES);
+} wokeby SEC(".maps");
-struct bpf_map_def SEC("maps") stackmap = {
- .type = BPF_MAP_TYPE_STACK_TRACE,
- .key_size = sizeof(u32),
- .value_size = PERF_MAX_STACK_DEPTH * sizeof(u64),
- .max_entries = 10000,
-};
+struct {
+ __uint(type, BPF_MAP_TYPE_STACK_TRACE);
+ __uint(key_size, sizeof(u32));
+ __uint(value_size, PERF_MAX_STACK_DEPTH * sizeof(u64));
+ __uint(max_entries, MAX_ENTRIES);
+} stackmap SEC(".maps");
#define STACKID_FLAGS (0 | BPF_F_FAST_STACK_CMP)
@@ -107,11 +113,11 @@ static inline int update_counts(void *ctx, u32 pid, u64 delta)
/* taken from /sys/kernel/debug/tracing/events/sched/sched_switch/format */
struct sched_switch_args {
unsigned long long pad;
- char prev_comm[16];
+ char prev_comm[TASK_COMM_LEN];
int prev_pid;
int prev_prio;
long long prev_state;
- char next_comm[16];
+ char next_comm[TASK_COMM_LEN];
int next_pid;
int next_prio;
};
diff --git a/samples/bpf/offwaketime_user.c b/samples/bpf/offwaketime_user.c
index 51c7da5341cc..b6eedcb98fb9 100644
--- a/samples/bpf/offwaketime_user.c
+++ b/samples/bpf/offwaketime_user.c
@@ -5,19 +5,18 @@
#include <unistd.h>
#include <stdlib.h>
#include <signal.h>
-#include <linux/bpf.h>
-#include <string.h>
#include <linux/perf_event.h>
#include <errno.h>
-#include <assert.h>
#include <stdbool.h>
-#include <sys/resource.h>
#include <bpf/libbpf.h>
-#include "bpf_load.h"
+#include <bpf/bpf.h>
#include "trace_helpers.h"
#define PRINT_RAW_ADDR 0
+/* counts, stackmap */
+static int map_fd[2];
+
static void print_ksym(__u64 addr)
{
struct ksym *sym;
@@ -52,14 +51,14 @@ static void print_stack(struct key_t *key, __u64 count)
int i;
printf("%s;", key->target);
- if (bpf_map_lookup_elem(map_fd[3], &key->tret, ip) != 0) {
+ if (bpf_map_lookup_elem(map_fd[1], &key->tret, ip) != 0) {
printf("---;");
} else {
for (i = PERF_MAX_STACK_DEPTH - 1; i >= 0; i--)
print_ksym(ip[i]);
}
printf("-;");
- if (bpf_map_lookup_elem(map_fd[3], &key->wret, ip) != 0) {
+ if (bpf_map_lookup_elem(map_fd[1], &key->wret, ip) != 0) {
printf("---;");
} else {
for (i = 0; i < PERF_MAX_STACK_DEPTH; i++)
@@ -95,24 +94,49 @@ static void int_exit(int sig)
int main(int argc, char **argv)
{
- struct rlimit r = {RLIM_INFINITY, RLIM_INFINITY};
+ struct bpf_object *obj = NULL;
+ struct bpf_link *links[2];
+ struct bpf_program *prog;
+ int delay = 1, i = 0;
char filename[256];
- int delay = 1;
-
- snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]);
- setrlimit(RLIMIT_MEMLOCK, &r);
-
- signal(SIGINT, int_exit);
- signal(SIGTERM, int_exit);
if (load_kallsyms()) {
printf("failed to process /proc/kallsyms\n");
return 2;
}
- if (load_bpf_file(filename)) {
- printf("%s", bpf_log_buf);
- return 1;
+ snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]);
+ obj = bpf_object__open_file(filename, NULL);
+ if (libbpf_get_error(obj)) {
+ fprintf(stderr, "ERROR: opening BPF object file failed\n");
+ obj = NULL;
+ goto cleanup;
+ }
+
+ /* load BPF program */
+ if (bpf_object__load(obj)) {
+ fprintf(stderr, "ERROR: loading BPF object file failed\n");
+ goto cleanup;
+ }
+
+ map_fd[0] = bpf_object__find_map_fd_by_name(obj, "counts");
+ map_fd[1] = bpf_object__find_map_fd_by_name(obj, "stackmap");
+ if (map_fd[0] < 0 || map_fd[1] < 0) {
+ fprintf(stderr, "ERROR: finding a map in obj file failed\n");
+ goto cleanup;
+ }
+
+ signal(SIGINT, int_exit);
+ signal(SIGTERM, int_exit);
+
+ bpf_object__for_each_program(prog, obj) {
+ links[i] = bpf_program__attach(prog);
+ if (libbpf_get_error(links[i])) {
+ fprintf(stderr, "ERROR: bpf_program__attach failed\n");
+ links[i] = NULL;
+ goto cleanup;
+ }
+ i++;
}
if (argc > 1)
@@ -120,5 +144,10 @@ int main(int argc, char **argv)
sleep(delay);
print_stacks(map_fd[0]);
+cleanup:
+ for (i--; i >= 0; i--)
+ bpf_link__destroy(links[i]);
+
+ bpf_object__close(obj);
return 0;
}
diff --git a/samples/bpf/sampleip_kern.c b/samples/bpf/sampleip_kern.c
index e504dc308371..a3f8a3998e0a 100644
--- a/samples/bpf/sampleip_kern.c
+++ b/samples/bpf/sampleip_kern.c
@@ -4,7 +4,6 @@
* modify it under the terms of version 2 of the GNU General Public
* License as published by the Free Software Foundation.
*/
-#include <linux/version.h>
#include <linux/ptrace.h>
#include <uapi/linux/bpf.h>
#include <uapi/linux/bpf_perf_event.h>
@@ -13,12 +12,12 @@
#define MAX_IPS 8192
-struct bpf_map_def SEC("maps") ip_map = {
- .type = BPF_MAP_TYPE_HASH,
- .key_size = sizeof(u64),
- .value_size = sizeof(u32),
- .max_entries = MAX_IPS,
-};
+struct {
+ __uint(type, BPF_MAP_TYPE_HASH);
+ __type(key, u64);
+ __type(value, u32);
+ __uint(max_entries, MAX_IPS);
+} ip_map SEC(".maps");
SEC("perf_event")
int do_sample(struct bpf_perf_event_data *ctx)
diff --git a/samples/bpf/sampleip_user.c b/samples/bpf/sampleip_user.c
index b0f115f938bc..921c505bb567 100644
--- a/samples/bpf/sampleip_user.c
+++ b/samples/bpf/sampleip_user.c
@@ -10,13 +10,11 @@
#include <errno.h>
#include <signal.h>
#include <string.h>
-#include <assert.h>
#include <linux/perf_event.h>
#include <linux/ptrace.h>
#include <linux/bpf.h>
-#include <sys/ioctl.h>
+#include <bpf/bpf.h>
#include <bpf/libbpf.h>
-#include "bpf_load.h"
#include "perf-sys.h"
#include "trace_helpers.h"
@@ -25,6 +23,7 @@
#define MAX_IPS 8192
#define PAGE_OFFSET 0xffff880000000000
+static int map_fd;
static int nr_cpus;
static void usage(void)
@@ -34,9 +33,10 @@ static void usage(void)
printf(" duration # sampling duration (seconds), default 5\n");
}
-static int sampling_start(int *pmu_fd, int freq)
+static int sampling_start(int freq, struct bpf_program *prog,
+ struct bpf_link *links[])
{
- int i;
+ int i, pmu_fd;
struct perf_event_attr pe_sample_attr = {
.type = PERF_TYPE_SOFTWARE,
@@ -47,26 +47,30 @@ static int sampling_start(int *pmu_fd, int freq)
};
for (i = 0; i < nr_cpus; i++) {
- pmu_fd[i] = sys_perf_event_open(&pe_sample_attr, -1 /* pid */, i,
+ pmu_fd = sys_perf_event_open(&pe_sample_attr, -1 /* pid */, i,
-1 /* group_fd */, 0 /* flags */);
- if (pmu_fd[i] < 0) {
+ if (pmu_fd < 0) {
fprintf(stderr, "ERROR: Initializing perf sampling\n");
return 1;
}
- assert(ioctl(pmu_fd[i], PERF_EVENT_IOC_SET_BPF,
- prog_fd[0]) == 0);
- assert(ioctl(pmu_fd[i], PERF_EVENT_IOC_ENABLE, 0) == 0);
+ links[i] = bpf_program__attach_perf_event(prog, pmu_fd);
+ if (libbpf_get_error(links[i])) {
+ fprintf(stderr, "ERROR: Attach perf event\n");
+ links[i] = NULL;
+ close(pmu_fd);
+ return 1;
+ }
}
return 0;
}
-static void sampling_end(int *pmu_fd)
+static void sampling_end(struct bpf_link *links[])
{
int i;
for (i = 0; i < nr_cpus; i++)
- close(pmu_fd[i]);
+ bpf_link__destroy(links[i]);
}
struct ipcount {
@@ -128,14 +132,17 @@ static void print_ip_map(int fd)
static void int_exit(int sig)
{
printf("\n");
- print_ip_map(map_fd[0]);
+ print_ip_map(map_fd);
exit(0);
}
int main(int argc, char **argv)
{
+ int opt, freq = DEFAULT_FREQ, secs = DEFAULT_SECS, error = 1;
+ struct bpf_object *obj = NULL;
+ struct bpf_program *prog;
+ struct bpf_link **links;
char filename[256];
- int *pmu_fd, opt, freq = DEFAULT_FREQ, secs = DEFAULT_SECS;
/* process arguments */
while ((opt = getopt(argc, argv, "F:h")) != -1) {
@@ -163,38 +170,58 @@ int main(int argc, char **argv)
}
/* create perf FDs for each CPU */
- nr_cpus = sysconf(_SC_NPROCESSORS_CONF);
- pmu_fd = malloc(nr_cpus * sizeof(int));
- if (pmu_fd == NULL) {
- fprintf(stderr, "ERROR: malloc of pmu_fd\n");
- return 1;
+ nr_cpus = sysconf(_SC_NPROCESSORS_ONLN);
+ links = calloc(nr_cpus, sizeof(struct bpf_link *));
+ if (!links) {
+ fprintf(stderr, "ERROR: malloc of links\n");
+ goto cleanup;
}
- /* load BPF program */
snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]);
- if (load_bpf_file(filename)) {
- fprintf(stderr, "ERROR: loading BPF program (errno %d):\n",
- errno);
- if (strcmp(bpf_log_buf, "") == 0)
- fprintf(stderr, "Try: ulimit -l unlimited\n");
- else
- fprintf(stderr, "%s", bpf_log_buf);
- return 1;
+ obj = bpf_object__open_file(filename, NULL);
+ if (libbpf_get_error(obj)) {
+ fprintf(stderr, "ERROR: opening BPF object file failed\n");
+ obj = NULL;
+ goto cleanup;
+ }
+
+ prog = bpf_object__find_program_by_name(obj, "do_sample");
+ if (!prog) {
+ fprintf(stderr, "ERROR: finding a prog in obj file failed\n");
+ goto cleanup;
}
+
+ /* load BPF program */
+ if (bpf_object__load(obj)) {
+ fprintf(stderr, "ERROR: loading BPF object file failed\n");
+ goto cleanup;
+ }
+
+ map_fd = bpf_object__find_map_fd_by_name(obj, "ip_map");
+ if (map_fd < 0) {
+ fprintf(stderr, "ERROR: finding a map in obj file failed\n");
+ goto cleanup;
+ }
+
signal(SIGINT, int_exit);
signal(SIGTERM, int_exit);
/* do sampling */
printf("Sampling at %d Hertz for %d seconds. Ctrl-C also ends.\n",
freq, secs);
- if (sampling_start(pmu_fd, freq) != 0)
- return 1;
+ if (sampling_start(freq, prog, links) != 0)
+ goto cleanup;
+
sleep(secs);
- sampling_end(pmu_fd);
- free(pmu_fd);
+ error = 0;
+cleanup:
+ sampling_end(links);
/* output sample counts */
- print_ip_map(map_fd[0]);
+ if (!error)
+ print_ip_map(map_fd);
- return 0;
+ free(links);
+ bpf_object__close(obj);
+ return error;
}
diff --git a/samples/bpf/sock_example.c b/samples/bpf/sock_example.c
index 00aae1d33fca..5b66f2401b96 100644
--- a/samples/bpf/sock_example.c
+++ b/samples/bpf/sock_example.c
@@ -29,6 +29,7 @@
#include <bpf/bpf.h>
#include "bpf_insn.h"
#include "sock_example.h"
+#include "bpf_util.h"
char bpf_log_buf[BPF_LOG_BUF_SIZE];
@@ -37,8 +38,8 @@ static int test_sock(void)
int sock = -1, map_fd, prog_fd, i, key;
long long value = 0, tcp_cnt, udp_cnt, icmp_cnt;
- map_fd = bpf_create_map(BPF_MAP_TYPE_ARRAY, sizeof(key), sizeof(value),
- 256, 0);
+ map_fd = bpf_map_create(BPF_MAP_TYPE_ARRAY, NULL, sizeof(key), sizeof(value),
+ 256, NULL);
if (map_fd < 0) {
printf("failed to create map '%s'\n", strerror(errno));
goto cleanup;
@@ -54,14 +55,18 @@ static int test_sock(void)
BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 2),
BPF_MOV64_IMM(BPF_REG_1, 1), /* r1 = 1 */
- BPF_RAW_INSN(BPF_STX | BPF_XADD | BPF_DW, BPF_REG_0, BPF_REG_1, 0, 0), /* xadd r0 += r1 */
+ BPF_ATOMIC_OP(BPF_DW, BPF_ADD, BPF_REG_0, BPF_REG_1, 0),
BPF_MOV64_IMM(BPF_REG_0, 0), /* r0 = 0 */
BPF_EXIT_INSN(),
};
- size_t insns_cnt = sizeof(prog) / sizeof(struct bpf_insn);
+ size_t insns_cnt = ARRAY_SIZE(prog);
+ LIBBPF_OPTS(bpf_prog_load_opts, opts,
+ .log_buf = bpf_log_buf,
+ .log_size = BPF_LOG_BUF_SIZE,
+ );
- prog_fd = bpf_load_program(BPF_PROG_TYPE_SOCKET_FILTER, prog, insns_cnt,
- "GPL", 0, bpf_log_buf, BPF_LOG_BUF_SIZE);
+ prog_fd = bpf_prog_load(BPF_PROG_TYPE_SOCKET_FILTER, NULL, "GPL",
+ prog, insns_cnt, &opts);
if (prog_fd < 0) {
printf("failed to load prog '%s'\n", strerror(errno));
goto cleanup;
diff --git a/samples/bpf/sockex1_user.c b/samples/bpf/sockex1_user.c
index 3c83722877dc..9e8d39e245c1 100644
--- a/samples/bpf/sockex1_user.c
+++ b/samples/bpf/sockex1_user.c
@@ -11,17 +11,26 @@
int main(int ac, char **argv)
{
struct bpf_object *obj;
+ struct bpf_program *prog;
int map_fd, prog_fd;
char filename[256];
- int i, sock;
+ int i, sock, err;
FILE *f;
snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]);
- if (bpf_prog_load(filename, BPF_PROG_TYPE_SOCKET_FILTER,
- &obj, &prog_fd))
+ obj = bpf_object__open_file(filename, NULL);
+ if (libbpf_get_error(obj))
return 1;
+ prog = bpf_object__next_program(obj, NULL);
+ bpf_program__set_type(prog, BPF_PROG_TYPE_SOCKET_FILTER);
+
+ err = bpf_object__load(obj);
+ if (err)
+ return 1;
+
+ prog_fd = bpf_program__fd(prog);
map_fd = bpf_object__find_map_fd_by_name(obj, "my_map");
sock = open_raw_sock("lo");
diff --git a/samples/bpf/sockex2_kern.c b/samples/bpf/sockex2_kern.c
index a41dd520bc53..b7997541f7ee 100644
--- a/samples/bpf/sockex2_kern.c
+++ b/samples/bpf/sockex2_kern.c
@@ -1,12 +1,12 @@
#include <uapi/linux/bpf.h>
-#include <bpf/bpf_helpers.h>
-#include "bpf_legacy.h"
#include <uapi/linux/in.h>
#include <uapi/linux/if.h>
#include <uapi/linux/if_ether.h>
#include <uapi/linux/ip.h>
#include <uapi/linux/ipv6.h>
#include <uapi/linux/if_tunnel.h>
+#include <bpf/bpf_helpers.h>
+#include "bpf_legacy.h"
#define IP_MF 0x2000
#define IP_OFFSET 0x1FFF
diff --git a/samples/bpf/sockex2_user.c b/samples/bpf/sockex2_user.c
index af925a5afd1d..2c18471336f0 100644
--- a/samples/bpf/sockex2_user.c
+++ b/samples/bpf/sockex2_user.c
@@ -7,7 +7,6 @@
#include "sock_example.h"
#include <unistd.h>
#include <arpa/inet.h>
-#include <sys/resource.h>
struct pair {
__u64 packets;
@@ -16,20 +15,26 @@ struct pair {
int main(int ac, char **argv)
{
- struct rlimit r = {RLIM_INFINITY, RLIM_INFINITY};
+ struct bpf_program *prog;
struct bpf_object *obj;
int map_fd, prog_fd;
char filename[256];
- int i, sock;
+ int i, sock, err;
FILE *f;
snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]);
- setrlimit(RLIMIT_MEMLOCK, &r);
+ obj = bpf_object__open_file(filename, NULL);
+ if (libbpf_get_error(obj))
+ return 1;
+
+ prog = bpf_object__next_program(obj, NULL);
+ bpf_program__set_type(prog, BPF_PROG_TYPE_SOCKET_FILTER);
- if (bpf_prog_load(filename, BPF_PROG_TYPE_SOCKET_FILTER,
- &obj, &prog_fd))
+ err = bpf_object__load(obj);
+ if (err)
return 1;
+ prog_fd = bpf_program__fd(prog);
map_fd = bpf_object__find_map_fd_by_name(obj, "hash_map");
sock = open_raw_sock("lo");
diff --git a/samples/bpf/sockex3_kern.c b/samples/bpf/sockex3_kern.c
index 36d4dac23549..b363503357e5 100644
--- a/samples/bpf/sockex3_kern.c
+++ b/samples/bpf/sockex3_kern.c
@@ -5,8 +5,6 @@
* License as published by the Free Software Foundation.
*/
#include <uapi/linux/bpf.h>
-#include <bpf/bpf_helpers.h>
-#include "bpf_legacy.h"
#include <uapi/linux/in.h>
#include <uapi/linux/if.h>
#include <uapi/linux/if_ether.h>
@@ -14,28 +12,32 @@
#include <uapi/linux/ipv6.h>
#include <uapi/linux/if_tunnel.h>
#include <uapi/linux/mpls.h>
+#include <bpf/bpf_helpers.h>
+#include "bpf_legacy.h"
#define IP_MF 0x2000
#define IP_OFFSET 0x1FFF
#define PROG(F) SEC("socket/"__stringify(F)) int bpf_func_##F
-struct bpf_map_def SEC("maps") jmp_table = {
- .type = BPF_MAP_TYPE_PROG_ARRAY,
- .key_size = sizeof(u32),
- .value_size = sizeof(u32),
- .max_entries = 8,
-};
+struct {
+ __uint(type, BPF_MAP_TYPE_PROG_ARRAY);
+ __uint(key_size, sizeof(u32));
+ __uint(value_size, sizeof(u32));
+ __uint(max_entries, 8);
+} jmp_table SEC(".maps");
#define PARSE_VLAN 1
#define PARSE_MPLS 2
#define PARSE_IP 3
#define PARSE_IPV6 4
-/* protocol dispatch routine.
- * It tail-calls next BPF program depending on eth proto
- * Note, we could have used:
- * bpf_tail_call(skb, &jmp_table, proto);
- * but it would need large prog_array
+/* Protocol dispatch routine. It tail-calls next BPF program depending
+ * on eth proto. Note, we could have used ...
+ *
+ * bpf_tail_call(skb, &jmp_table, proto);
+ *
+ * ... but it would need large prog_array and cannot be optimised given
+ * the map key is not static.
*/
static inline void parse_eth_proto(struct __sk_buff *skb, u32 proto)
{
@@ -92,12 +94,12 @@ struct globals {
struct flow_key_record flow;
};
-struct bpf_map_def SEC("maps") percpu_map = {
- .type = BPF_MAP_TYPE_ARRAY,
- .key_size = sizeof(__u32),
- .value_size = sizeof(struct globals),
- .max_entries = 32,
-};
+struct {
+ __uint(type, BPF_MAP_TYPE_ARRAY);
+ __type(key, __u32);
+ __type(value, struct globals);
+ __uint(max_entries, 32);
+} percpu_map SEC(".maps");
/* user poor man's per_cpu until native support is ready */
static struct globals *this_cpu_globals(void)
@@ -113,12 +115,12 @@ struct pair {
__u64 bytes;
};
-struct bpf_map_def SEC("maps") hash_map = {
- .type = BPF_MAP_TYPE_HASH,
- .key_size = sizeof(struct flow_key_record),
- .value_size = sizeof(struct pair),
- .max_entries = 1024,
-};
+struct {
+ __uint(type, BPF_MAP_TYPE_HASH);
+ __type(key, struct flow_key_record);
+ __type(value, struct pair);
+ __uint(max_entries, 1024);
+} hash_map SEC(".maps");
static void update_stats(struct __sk_buff *skb, struct globals *g)
{
diff --git a/samples/bpf/sockex3_user.c b/samples/bpf/sockex3_user.c
index bbb1cd0666a9..cd6fa79df900 100644
--- a/samples/bpf/sockex3_user.c
+++ b/samples/bpf/sockex3_user.c
@@ -1,17 +1,11 @@
// SPDX-License-Identifier: GPL-2.0
#include <stdio.h>
#include <assert.h>
-#include <linux/bpf.h>
#include <bpf/bpf.h>
-#include "bpf_load.h"
+#include <bpf/libbpf.h>
#include "sock_example.h"
#include <unistd.h>
#include <arpa/inet.h>
-#include <sys/resource.h>
-
-#define PARSE_IP 3
-#define PARSE_IP_PROG_FD (prog_fd[0])
-#define PROG_ARRAY_FD (map_fd[0])
struct flow_key_record {
__be32 src;
@@ -30,31 +24,53 @@ struct pair {
int main(int argc, char **argv)
{
- struct rlimit r = {RLIM_INFINITY, RLIM_INFINITY};
+ int i, sock, key, fd, main_prog_fd, jmp_table_fd, hash_map_fd;
+ struct bpf_program *prog;
+ struct bpf_object *obj;
+ const char *section;
char filename[256];
FILE *f;
- int i, sock, err, id, key = PARSE_IP;
- struct bpf_prog_info info = {};
- uint32_t info_len = sizeof(info);
snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]);
- setrlimit(RLIMIT_MEMLOCK, &r);
- if (load_bpf_file(filename)) {
- printf("%s", bpf_log_buf);
- return 1;
+ obj = bpf_object__open_file(filename, NULL);
+ if (libbpf_get_error(obj)) {
+ fprintf(stderr, "ERROR: opening BPF object file failed\n");
+ return 0;
+ }
+
+ /* load BPF program */
+ if (bpf_object__load(obj)) {
+ fprintf(stderr, "ERROR: loading BPF object file failed\n");
+ goto cleanup;
+ }
+
+ jmp_table_fd = bpf_object__find_map_fd_by_name(obj, "jmp_table");
+ hash_map_fd = bpf_object__find_map_fd_by_name(obj, "hash_map");
+ if (jmp_table_fd < 0 || hash_map_fd < 0) {
+ fprintf(stderr, "ERROR: finding a map in obj file failed\n");
+ goto cleanup;
}
- /* Test fd array lookup which returns the id of the bpf_prog */
- err = bpf_obj_get_info_by_fd(PARSE_IP_PROG_FD, &info, &info_len);
- assert(!err);
- err = bpf_map_lookup_elem(PROG_ARRAY_FD, &key, &id);
- assert(!err);
- assert(id == info.id);
+ bpf_object__for_each_program(prog, obj) {
+ fd = bpf_program__fd(prog);
+
+ section = bpf_program__section_name(prog);
+ if (sscanf(section, "socket/%d", &key) != 1) {
+ fprintf(stderr, "ERROR: finding prog failed\n");
+ goto cleanup;
+ }
+
+ if (key == 0)
+ main_prog_fd = fd;
+ else
+ bpf_map_update_elem(jmp_table_fd, &key, &fd, BPF_ANY);
+ }
sock = open_raw_sock("lo");
- assert(setsockopt(sock, SOL_SOCKET, SO_ATTACH_BPF, &prog_fd[4],
+ /* attach BPF program to socket */
+ assert(setsockopt(sock, SOL_SOCKET, SO_ATTACH_BPF, &main_prog_fd,
sizeof(__u32)) == 0);
if (argc > 1)
@@ -69,8 +85,8 @@ int main(int argc, char **argv)
sleep(1);
printf("IP src.port -> dst.port bytes packets\n");
- while (bpf_map_get_next_key(map_fd[2], &key, &next_key) == 0) {
- bpf_map_lookup_elem(map_fd[2], &next_key, &value);
+ while (bpf_map_get_next_key(hash_map_fd, &key, &next_key) == 0) {
+ bpf_map_lookup_elem(hash_map_fd, &next_key, &value);
printf("%s.%05d -> %s.%05d %12lld %12lld\n",
inet_ntoa((struct in_addr){htonl(next_key.src)}),
next_key.port16[0],
@@ -80,5 +96,8 @@ int main(int argc, char **argv)
key = next_key;
}
}
+
+cleanup:
+ bpf_object__close(obj);
return 0;
}
diff --git a/samples/bpf/spintest_kern.c b/samples/bpf/spintest_kern.c
index f508af357251..455da77319d9 100644
--- a/samples/bpf/spintest_kern.c
+++ b/samples/bpf/spintest_kern.c
@@ -12,25 +12,25 @@
#include <bpf/bpf_helpers.h>
#include <bpf/bpf_tracing.h>
-struct bpf_map_def SEC("maps") my_map = {
- .type = BPF_MAP_TYPE_HASH,
- .key_size = sizeof(long),
- .value_size = sizeof(long),
- .max_entries = 1024,
-};
-struct bpf_map_def SEC("maps") my_map2 = {
- .type = BPF_MAP_TYPE_PERCPU_HASH,
- .key_size = sizeof(long),
- .value_size = sizeof(long),
- .max_entries = 1024,
-};
+struct {
+ __uint(type, BPF_MAP_TYPE_HASH);
+ __type(key, long);
+ __type(value, long);
+ __uint(max_entries, 1024);
+} my_map SEC(".maps");
+struct {
+ __uint(type, BPF_MAP_TYPE_PERCPU_HASH);
+ __uint(key_size, sizeof(long));
+ __uint(value_size, sizeof(long));
+ __uint(max_entries, 1024);
+} my_map2 SEC(".maps");
-struct bpf_map_def SEC("maps") stackmap = {
- .type = BPF_MAP_TYPE_STACK_TRACE,
- .key_size = sizeof(u32),
- .value_size = PERF_MAX_STACK_DEPTH * sizeof(u64),
- .max_entries = 10000,
-};
+struct {
+ __uint(type, BPF_MAP_TYPE_STACK_TRACE);
+ __uint(key_size, sizeof(u32));
+ __uint(value_size, PERF_MAX_STACK_DEPTH * sizeof(u64));
+ __uint(max_entries, 10000);
+} stackmap SEC(".maps");
#define PROG(foo) \
int foo(struct pt_regs *ctx) \
diff --git a/samples/bpf/spintest_user.c b/samples/bpf/spintest_user.c
index fb430ea2ef51..aadac14f748a 100644
--- a/samples/bpf/spintest_user.c
+++ b/samples/bpf/spintest_user.c
@@ -1,40 +1,70 @@
// SPDX-License-Identifier: GPL-2.0
#include <stdio.h>
#include <unistd.h>
-#include <linux/bpf.h>
#include <string.h>
#include <assert.h>
-#include <sys/resource.h>
#include <bpf/libbpf.h>
-#include "bpf_load.h"
+#include <bpf/bpf.h>
#include "trace_helpers.h"
int main(int ac, char **argv)
{
- struct rlimit r = {RLIM_INFINITY, RLIM_INFINITY};
+ char filename[256], symbol[256];
+ struct bpf_object *obj = NULL;
+ struct bpf_link *links[20];
long key, next_key, value;
- char filename[256];
+ struct bpf_program *prog;
+ int map_fd, i, j = 0;
+ const char *section;
struct ksym *sym;
- int i;
-
- snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]);
- setrlimit(RLIMIT_MEMLOCK, &r);
if (load_kallsyms()) {
printf("failed to process /proc/kallsyms\n");
return 2;
}
- if (load_bpf_file(filename)) {
- printf("%s", bpf_log_buf);
- return 1;
+ snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]);
+ obj = bpf_object__open_file(filename, NULL);
+ if (libbpf_get_error(obj)) {
+ fprintf(stderr, "ERROR: opening BPF object file failed\n");
+ obj = NULL;
+ goto cleanup;
+ }
+
+ /* load BPF program */
+ if (bpf_object__load(obj)) {
+ fprintf(stderr, "ERROR: loading BPF object file failed\n");
+ goto cleanup;
+ }
+
+ map_fd = bpf_object__find_map_fd_by_name(obj, "my_map");
+ if (map_fd < 0) {
+ fprintf(stderr, "ERROR: finding a map in obj file failed\n");
+ goto cleanup;
+ }
+
+ bpf_object__for_each_program(prog, obj) {
+ section = bpf_program__section_name(prog);
+ if (sscanf(section, "kprobe/%s", symbol) != 1)
+ continue;
+
+ /* Attach prog only when symbol exists */
+ if (ksym_get_addr(symbol)) {
+ links[j] = bpf_program__attach(prog);
+ if (libbpf_get_error(links[j])) {
+ fprintf(stderr, "bpf_program__attach failed\n");
+ links[j] = NULL;
+ goto cleanup;
+ }
+ j++;
+ }
}
for (i = 0; i < 5; i++) {
key = 0;
printf("kprobing funcs:");
- while (bpf_map_get_next_key(map_fd[0], &key, &next_key) == 0) {
- bpf_map_lookup_elem(map_fd[0], &next_key, &value);
+ while (bpf_map_get_next_key(map_fd, &key, &next_key) == 0) {
+ bpf_map_lookup_elem(map_fd, &next_key, &value);
assert(next_key == value);
sym = ksym_search(value);
key = next_key;
@@ -48,10 +78,15 @@ int main(int ac, char **argv)
if (key)
printf("\n");
key = 0;
- while (bpf_map_get_next_key(map_fd[0], &key, &next_key) == 0)
- bpf_map_delete_elem(map_fd[0], &next_key);
+ while (bpf_map_get_next_key(map_fd, &key, &next_key) == 0)
+ bpf_map_delete_elem(map_fd, &next_key);
sleep(1);
}
+cleanup:
+ for (j--; j >= 0; j--)
+ bpf_link__destroy(links[j]);
+
+ bpf_object__close(obj);
return 0;
}
diff --git a/samples/bpf/syscall_tp_kern.c b/samples/bpf/syscall_tp_kern.c
index 5a62b03b1f88..50231c2eff9c 100644
--- a/samples/bpf/syscall_tp_kern.c
+++ b/samples/bpf/syscall_tp_kern.c
@@ -18,19 +18,19 @@ struct syscalls_exit_open_args {
long ret;
};
-struct bpf_map_def SEC("maps") enter_open_map = {
- .type = BPF_MAP_TYPE_ARRAY,
- .key_size = sizeof(u32),
- .value_size = sizeof(u32),
- .max_entries = 1,
-};
+struct {
+ __uint(type, BPF_MAP_TYPE_ARRAY);
+ __type(key, u32);
+ __type(value, u32);
+ __uint(max_entries, 1);
+} enter_open_map SEC(".maps");
-struct bpf_map_def SEC("maps") exit_open_map = {
- .type = BPF_MAP_TYPE_ARRAY,
- .key_size = sizeof(u32),
- .value_size = sizeof(u32),
- .max_entries = 1,
-};
+struct {
+ __uint(type, BPF_MAP_TYPE_ARRAY);
+ __type(key, u32);
+ __type(value, u32);
+ __uint(max_entries, 1);
+} exit_open_map SEC(".maps");
static __always_inline void count(void *map)
{
diff --git a/samples/bpf/syscall_tp_user.c b/samples/bpf/syscall_tp_user.c
index 57014bab7cbe..7a788bb837fc 100644
--- a/samples/bpf/syscall_tp_user.c
+++ b/samples/bpf/syscall_tp_user.c
@@ -5,16 +5,11 @@
#include <unistd.h>
#include <fcntl.h>
#include <stdlib.h>
-#include <signal.h>
-#include <linux/bpf.h>
#include <string.h>
#include <linux/perf_event.h>
#include <errno.h>
-#include <assert.h>
-#include <stdbool.h>
-#include <sys/resource.h>
+#include <bpf/libbpf.h>
#include <bpf/bpf.h>
-#include "bpf_load.h"
/* This program verifies bpf attachment to tracepoint sys_enter_* and sys_exit_*.
* This requires kernel CONFIG_FTRACE_SYSCALLS to be set.
@@ -40,6 +35,9 @@ static void verify_map(int map_id)
fprintf(stderr, "failed: map #%d returns value 0\n", map_id);
return;
}
+
+ printf("verify map:%d val: %d\n", map_id, val);
+
val = 0;
if (bpf_map_update_elem(map_id, &key, &val, BPF_ANY) != 0) {
fprintf(stderr, "map_update failed: %s\n", strerror(errno));
@@ -49,16 +47,44 @@ static void verify_map(int map_id)
static int test(char *filename, int num_progs)
{
- int i, fd, map0_fds[num_progs], map1_fds[num_progs];
+ int map0_fds[num_progs], map1_fds[num_progs], fd, i, j = 0;
+ struct bpf_link *links[num_progs * 4];
+ struct bpf_object *objs[num_progs];
+ struct bpf_program *prog;
for (i = 0; i < num_progs; i++) {
- if (load_bpf_file(filename)) {
- fprintf(stderr, "%s", bpf_log_buf);
- return 1;
+ objs[i] = bpf_object__open_file(filename, NULL);
+ if (libbpf_get_error(objs[i])) {
+ fprintf(stderr, "opening BPF object file failed\n");
+ objs[i] = NULL;
+ goto cleanup;
}
- printf("prog #%d: map ids %d %d\n", i, map_fd[0], map_fd[1]);
- map0_fds[i] = map_fd[0];
- map1_fds[i] = map_fd[1];
+
+ /* load BPF program */
+ if (bpf_object__load(objs[i])) {
+ fprintf(stderr, "loading BPF object file failed\n");
+ goto cleanup;
+ }
+
+ map0_fds[i] = bpf_object__find_map_fd_by_name(objs[i],
+ "enter_open_map");
+ map1_fds[i] = bpf_object__find_map_fd_by_name(objs[i],
+ "exit_open_map");
+ if (map0_fds[i] < 0 || map1_fds[i] < 0) {
+ fprintf(stderr, "finding a map in obj file failed\n");
+ goto cleanup;
+ }
+
+ bpf_object__for_each_program(prog, objs[i]) {
+ links[j] = bpf_program__attach(prog);
+ if (libbpf_get_error(links[j])) {
+ fprintf(stderr, "bpf_program__attach failed\n");
+ links[j] = NULL;
+ goto cleanup;
+ }
+ j++;
+ }
+ printf("prog #%d: map ids %d %d\n", i, map0_fds[i], map1_fds[i]);
}
/* current load_bpf_file has perf_event_open default pid = -1
@@ -80,12 +106,17 @@ static int test(char *filename, int num_progs)
verify_map(map1_fds[i]);
}
+cleanup:
+ for (j--; j >= 0; j--)
+ bpf_link__destroy(links[j]);
+
+ for (i--; i >= 0; i--)
+ bpf_object__close(objs[i]);
return 0;
}
int main(int argc, char **argv)
{
- struct rlimit r = {RLIM_INFINITY, RLIM_INFINITY};
int opt, num_progs = 1;
char filename[256];
@@ -101,7 +132,6 @@ int main(int argc, char **argv)
}
}
- setrlimit(RLIMIT_MEMLOCK, &r);
snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]);
return test(filename, num_progs);
diff --git a/samples/bpf/task_fd_query_kern.c b/samples/bpf/task_fd_query_kern.c
index 278ade5427c8..186ac0a79c0a 100644
--- a/samples/bpf/task_fd_query_kern.c
+++ b/samples/bpf/task_fd_query_kern.c
@@ -10,7 +10,7 @@ int bpf_prog1(struct pt_regs *ctx)
return 0;
}
-SEC("kretprobe/blk_account_io_completion")
+SEC("kretprobe/__blk_account_io_done")
int bpf_prog2(struct pt_regs *ctx)
{
return 0;
diff --git a/samples/bpf/task_fd_query_user.c b/samples/bpf/task_fd_query_user.c
index ff2e9c1c7266..a33d74bd3a4b 100644
--- a/samples/bpf/task_fd_query_user.c
+++ b/samples/bpf/task_fd_query_user.c
@@ -10,17 +10,19 @@
#include <fcntl.h>
#include <linux/bpf.h>
#include <sys/ioctl.h>
-#include <sys/resource.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <linux/perf_event.h>
+#include <bpf/bpf.h>
#include <bpf/libbpf.h>
-#include "bpf_load.h"
#include "bpf_util.h"
#include "perf-sys.h"
#include "trace_helpers.h"
+static struct bpf_program *progs[2];
+static struct bpf_link *links[2];
+
#define CHECK_PERROR_RET(condition) ({ \
int __ret = !!(condition); \
if (__ret) { \
@@ -86,21 +88,22 @@ static int bpf_get_retprobe_bit(const char *event_type)
return ret;
}
-static int test_debug_fs_kprobe(int prog_fd_idx, const char *fn_name,
+static int test_debug_fs_kprobe(int link_idx, const char *fn_name,
__u32 expected_fd_type)
{
__u64 probe_offset, probe_addr;
__u32 len, prog_id, fd_type;
+ int err, event_fd;
char buf[256];
- int err;
len = sizeof(buf);
- err = bpf_task_fd_query(getpid(), event_fd[prog_fd_idx], 0, buf, &len,
+ event_fd = bpf_link__fd(links[link_idx]);
+ err = bpf_task_fd_query(getpid(), event_fd, 0, buf, &len,
&prog_id, &fd_type, &probe_offset,
&probe_addr);
if (err < 0) {
printf("FAIL: %s, for event_fd idx %d, fn_name %s\n",
- __func__, prog_fd_idx, fn_name);
+ __func__, link_idx, fn_name);
perror(" :");
return -1;
}
@@ -108,7 +111,7 @@ static int test_debug_fs_kprobe(int prog_fd_idx, const char *fn_name,
fd_type != expected_fd_type ||
probe_offset != 0x0 || probe_addr != 0x0) {
printf("FAIL: bpf_trace_event_query(event_fd[%d]):\n",
- prog_fd_idx);
+ link_idx);
printf("buf: %s, fd_type: %u, probe_offset: 0x%llx,"
" probe_addr: 0x%llx\n",
buf, fd_type, probe_offset, probe_addr);
@@ -125,12 +128,13 @@ static int test_nondebug_fs_kuprobe_common(const char *event_type,
int is_return_bit = bpf_get_retprobe_bit(event_type);
int type = bpf_find_probe_type(event_type);
struct perf_event_attr attr = {};
- int fd;
+ struct bpf_link *link;
+ int fd, err = -1;
if (type < 0 || is_return_bit < 0) {
printf("FAIL: %s incorrect type (%d) or is_return_bit (%d)\n",
__func__, type, is_return_bit);
- return -1;
+ return err;
}
attr.sample_period = 1;
@@ -149,14 +153,21 @@ static int test_nondebug_fs_kuprobe_common(const char *event_type,
attr.type = type;
fd = sys_perf_event_open(&attr, -1, 0, -1, 0);
- CHECK_PERROR_RET(fd < 0);
+ link = bpf_program__attach_perf_event(progs[0], fd);
+ if (libbpf_get_error(link)) {
+ printf("ERROR: bpf_program__attach_perf_event failed\n");
+ link = NULL;
+ close(fd);
+ goto cleanup;
+ }
- CHECK_PERROR_RET(ioctl(fd, PERF_EVENT_IOC_ENABLE, 0) < 0);
- CHECK_PERROR_RET(ioctl(fd, PERF_EVENT_IOC_SET_BPF, prog_fd[0]) < 0);
CHECK_PERROR_RET(bpf_task_fd_query(getpid(), fd, 0, buf, buf_len,
prog_id, fd_type, probe_offset, probe_addr) < 0);
+ err = 0;
- return 0;
+cleanup:
+ bpf_link__destroy(link);
+ return err;
}
static int test_nondebug_fs_probe(const char *event_type, const char *name,
@@ -215,17 +226,18 @@ static int test_nondebug_fs_probe(const char *event_type, const char *name,
static int test_debug_fs_uprobe(char *binary_path, long offset, bool is_return)
{
+ char buf[256], event_alias[sizeof("test_1234567890")];
const char *event_type = "uprobe";
struct perf_event_attr attr = {};
- char buf[256], event_alias[sizeof("test_1234567890")];
__u64 probe_offset, probe_addr;
__u32 len, prog_id, fd_type;
- int err, res, kfd, efd;
+ int err = -1, res, kfd, efd;
+ struct bpf_link *link;
ssize_t bytes;
snprintf(buf, sizeof(buf), "/sys/kernel/debug/tracing/%s_events",
event_type);
- kfd = open(buf, O_WRONLY | O_APPEND, 0);
+ kfd = open(buf, O_WRONLY | O_TRUNC, 0);
CHECK_PERROR_RET(kfd < 0);
res = snprintf(event_alias, sizeof(event_alias), "test_%d", getpid());
@@ -254,10 +266,15 @@ static int test_debug_fs_uprobe(char *binary_path, long offset, bool is_return)
attr.type = PERF_TYPE_TRACEPOINT;
attr.sample_period = 1;
attr.wakeup_events = 1;
+
kfd = sys_perf_event_open(&attr, -1, 0, -1, PERF_FLAG_FD_CLOEXEC);
- CHECK_PERROR_RET(kfd < 0);
- CHECK_PERROR_RET(ioctl(kfd, PERF_EVENT_IOC_SET_BPF, prog_fd[0]) < 0);
- CHECK_PERROR_RET(ioctl(kfd, PERF_EVENT_IOC_ENABLE, 0) < 0);
+ link = bpf_program__attach_perf_event(progs[0], kfd);
+ if (libbpf_get_error(link)) {
+ printf("ERROR: bpf_program__attach_perf_event failed\n");
+ link = NULL;
+ close(kfd);
+ goto cleanup;
+ }
len = sizeof(buf);
err = bpf_task_fd_query(getpid(), kfd, 0, buf, &len,
@@ -283,38 +300,55 @@ static int test_debug_fs_uprobe(char *binary_path, long offset, bool is_return)
probe_offset);
return -1;
}
+ err = 0;
- close(kfd);
- return 0;
+cleanup:
+ bpf_link__destroy(link);
+ return err;
}
int main(int argc, char **argv)
{
- struct rlimit r = {1024*1024, RLIM_INFINITY};
extern char __executable_start;
char filename[256], buf[256];
__u64 uprobe_file_offset;
+ struct bpf_program *prog;
+ struct bpf_object *obj;
+ int i = 0, err = -1;
+
+ if (load_kallsyms()) {
+ printf("failed to process /proc/kallsyms\n");
+ return err;
+ }
snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]);
- if (setrlimit(RLIMIT_MEMLOCK, &r)) {
- perror("setrlimit(RLIMIT_MEMLOCK)");
- return 1;
+ obj = bpf_object__open_file(filename, NULL);
+ if (libbpf_get_error(obj)) {
+ fprintf(stderr, "ERROR: opening BPF object file failed\n");
+ return err;
}
- if (load_kallsyms()) {
- printf("failed to process /proc/kallsyms\n");
- return 1;
+ /* load BPF program */
+ if (bpf_object__load(obj)) {
+ fprintf(stderr, "ERROR: loading BPF object file failed\n");
+ goto cleanup;
}
- if (load_bpf_file(filename)) {
- printf("%s", bpf_log_buf);
- return 1;
+ bpf_object__for_each_program(prog, obj) {
+ progs[i] = prog;
+ links[i] = bpf_program__attach(progs[i]);
+ if (libbpf_get_error(links[i])) {
+ fprintf(stderr, "ERROR: bpf_program__attach failed\n");
+ links[i] = NULL;
+ goto cleanup;
+ }
+ i++;
}
/* test two functions in the corresponding *_kern.c file */
CHECK_AND_RET(test_debug_fs_kprobe(0, "blk_mq_start_request",
BPF_FD_TYPE_KPROBE));
- CHECK_AND_RET(test_debug_fs_kprobe(1, "blk_account_io_completion",
+ CHECK_AND_RET(test_debug_fs_kprobe(1, "__blk_account_io_done",
BPF_FD_TYPE_KRETPROBE));
/* test nondebug fs kprobe */
@@ -361,7 +395,7 @@ int main(int argc, char **argv)
* on different systems with different compilers. The right way is
* to parse the ELF file. We took a shortcut here.
*/
- uprobe_file_offset = (__u64)main - (__u64)&__executable_start;
+ uprobe_file_offset = (unsigned long)main - (unsigned long)&__executable_start;
CHECK_AND_RET(test_nondebug_fs_probe("uprobe", (char *)argv[0],
uprobe_file_offset, 0x0, false,
BPF_FD_TYPE_UPROBE,
@@ -378,6 +412,12 @@ int main(int argc, char **argv)
false));
CHECK_AND_RET(test_debug_fs_uprobe((char *)argv[0], uprobe_file_offset,
true));
+ err = 0;
- return 0;
+cleanup:
+ for (i--; i >= 0; i--)
+ bpf_link__destroy(links[i]);
+
+ bpf_object__close(obj);
+ return err;
}
diff --git a/samples/bpf/test_cgrp2_array_pin.c b/samples/bpf/test_cgrp2_array_pin.c
index 6d564aa75447..05e88aa63009 100644
--- a/samples/bpf/test_cgrp2_array_pin.c
+++ b/samples/bpf/test_cgrp2_array_pin.c
@@ -64,9 +64,9 @@ int main(int argc, char **argv)
}
if (create_array) {
- array_fd = bpf_create_map(BPF_MAP_TYPE_CGROUP_ARRAY,
+ array_fd = bpf_map_create(BPF_MAP_TYPE_CGROUP_ARRAY, NULL,
sizeof(uint32_t), sizeof(uint32_t),
- 1, 0);
+ 1, NULL);
if (array_fd < 0) {
fprintf(stderr,
"bpf_create_map(BPF_MAP_TYPE_CGROUP_ARRAY,...): %s(%d)\n",
diff --git a/samples/bpf/test_cgrp2_attach.c b/samples/bpf/test_cgrp2_attach.c
index 20fbd1241db3..68ce69457afe 100644
--- a/samples/bpf/test_cgrp2_attach.c
+++ b/samples/bpf/test_cgrp2_attach.c
@@ -31,6 +31,7 @@
#include <bpf/bpf.h>
#include "bpf_insn.h"
+#include "bpf_util.h"
enum {
MAP_KEY_PACKETS,
@@ -53,7 +54,7 @@ static int prog_load(int map_fd, int verdict)
BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 2),
BPF_MOV64_IMM(BPF_REG_1, 1), /* r1 = 1 */
- BPF_RAW_INSN(BPF_STX | BPF_XADD | BPF_DW, BPF_REG_0, BPF_REG_1, 0, 0), /* xadd r0 += r1 */
+ BPF_ATOMIC_OP(BPF_DW, BPF_ADD, BPF_REG_0, BPF_REG_1, 0),
/* Count bytes */
BPF_MOV64_IMM(BPF_REG_0, MAP_KEY_BYTES), /* r0 = 1 */
@@ -64,16 +65,20 @@ static int prog_load(int map_fd, int verdict)
BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 2),
BPF_LDX_MEM(BPF_W, BPF_REG_1, BPF_REG_6, offsetof(struct __sk_buff, len)), /* r1 = skb->len */
- BPF_RAW_INSN(BPF_STX | BPF_XADD | BPF_DW, BPF_REG_0, BPF_REG_1, 0, 0), /* xadd r0 += r1 */
+
+ BPF_ATOMIC_OP(BPF_DW, BPF_ADD, BPF_REG_0, BPF_REG_1, 0),
BPF_MOV64_IMM(BPF_REG_0, verdict), /* r0 = verdict */
BPF_EXIT_INSN(),
};
- size_t insns_cnt = sizeof(prog) / sizeof(struct bpf_insn);
-
- return bpf_load_program(BPF_PROG_TYPE_CGROUP_SKB,
- prog, insns_cnt, "GPL", 0,
- bpf_log_buf, BPF_LOG_BUF_SIZE);
+ size_t insns_cnt = ARRAY_SIZE(prog);
+ LIBBPF_OPTS(bpf_prog_load_opts, opts,
+ .log_buf = bpf_log_buf,
+ .log_size = BPF_LOG_BUF_SIZE,
+ );
+
+ return bpf_prog_load(BPF_PROG_TYPE_CGROUP_SKB, NULL, "GPL",
+ prog, insns_cnt, &opts);
}
static int usage(const char *argv0)
@@ -89,9 +94,9 @@ static int attach_filter(int cg_fd, int type, int verdict)
int prog_fd, map_fd, ret, key;
long long pkt_cnt, byte_cnt;
- map_fd = bpf_create_map(BPF_MAP_TYPE_ARRAY,
+ map_fd = bpf_map_create(BPF_MAP_TYPE_ARRAY, NULL,
sizeof(key), sizeof(byte_cnt),
- 256, 0);
+ 256, NULL);
if (map_fd < 0) {
printf("Failed to create map: '%s'\n", strerror(errno));
return EXIT_FAILURE;
diff --git a/samples/bpf/test_cgrp2_sock.c b/samples/bpf/test_cgrp2_sock.c
index b0811da5a00f..a0811df888f4 100644
--- a/samples/bpf/test_cgrp2_sock.c
+++ b/samples/bpf/test_cgrp2_sock.c
@@ -70,6 +70,10 @@ static int prog_load(__u32 idx, __u32 mark, __u32 prio)
BPF_MOV64_IMM(BPF_REG_2, offsetof(struct bpf_sock, priority)),
BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_3, offsetof(struct bpf_sock, priority)),
};
+ LIBBPF_OPTS(bpf_prog_load_opts, opts,
+ .log_buf = bpf_log_buf,
+ .log_size = BPF_LOG_BUF_SIZE,
+ );
struct bpf_insn *prog;
size_t insns_cnt;
@@ -115,8 +119,8 @@ static int prog_load(__u32 idx, __u32 mark, __u32 prio)
insns_cnt /= sizeof(struct bpf_insn);
- ret = bpf_load_program(BPF_PROG_TYPE_CGROUP_SOCK, prog, insns_cnt,
- "GPL", 0, bpf_log_buf, BPF_LOG_BUF_SIZE);
+ ret = bpf_prog_load(BPF_PROG_TYPE_CGROUP_SOCK, NULL, "GPL",
+ prog, insns_cnt, &opts);
free(prog);
diff --git a/samples/bpf/test_cgrp2_sock2.c b/samples/bpf/test_cgrp2_sock2.c
index a9277b118c33..e7060aaa2f5a 100644
--- a/samples/bpf/test_cgrp2_sock2.c
+++ b/samples/bpf/test_cgrp2_sock2.c
@@ -20,9 +20,9 @@
#include <net/if.h>
#include <linux/bpf.h>
#include <bpf/bpf.h>
+#include <bpf/libbpf.h>
#include "bpf_insn.h"
-#include "bpf_load.h"
static int usage(const char *argv0)
{
@@ -32,37 +32,64 @@ static int usage(const char *argv0)
int main(int argc, char **argv)
{
- int cg_fd, ret, filter_id = 0;
+ int cg_fd, err, ret = EXIT_FAILURE, filter_id = 0, prog_cnt = 0;
+ const char *link_pin_path = "/sys/fs/bpf/test_cgrp2_sock2";
+ struct bpf_link *link = NULL;
+ struct bpf_program *progs[2];
+ struct bpf_program *prog;
+ struct bpf_object *obj;
if (argc < 3)
return usage(argv[0]);
+ if (argc > 3)
+ filter_id = atoi(argv[3]);
+
cg_fd = open(argv[1], O_DIRECTORY | O_RDONLY);
if (cg_fd < 0) {
printf("Failed to open cgroup path: '%s'\n", strerror(errno));
- return EXIT_FAILURE;
+ return ret;
}
- if (load_bpf_file(argv[2]))
- return EXIT_FAILURE;
-
- printf("Output from kernel verifier:\n%s\n-------\n", bpf_log_buf);
+ obj = bpf_object__open_file(argv[2], NULL);
+ if (libbpf_get_error(obj)) {
+ printf("ERROR: opening BPF object file failed\n");
+ return ret;
+ }
- if (argc > 3)
- filter_id = atoi(argv[3]);
+ bpf_object__for_each_program(prog, obj) {
+ progs[prog_cnt] = prog;
+ prog_cnt++;
+ }
if (filter_id >= prog_cnt) {
printf("Invalid program id; program not found in file\n");
- return EXIT_FAILURE;
+ goto cleanup;
+ }
+
+ /* load BPF program */
+ if (bpf_object__load(obj)) {
+ printf("ERROR: loading BPF object file failed\n");
+ goto cleanup;
}
- ret = bpf_prog_attach(prog_fd[filter_id], cg_fd,
- BPF_CGROUP_INET_SOCK_CREATE, 0);
- if (ret < 0) {
- printf("Failed to attach prog to cgroup: '%s'\n",
- strerror(errno));
- return EXIT_FAILURE;
+ link = bpf_program__attach_cgroup(progs[filter_id], cg_fd);
+ if (libbpf_get_error(link)) {
+ printf("ERROR: bpf_program__attach failed\n");
+ link = NULL;
+ goto cleanup;
}
- return EXIT_SUCCESS;
+ err = bpf_link__pin(link, link_pin_path);
+ if (err < 0) {
+ printf("ERROR: bpf_link__pin failed: %d\n", err);
+ goto cleanup;
+ }
+
+ ret = EXIT_SUCCESS;
+
+cleanup:
+ bpf_link__destroy(link);
+ bpf_object__close(obj);
+ return ret;
}
diff --git a/samples/bpf/test_cgrp2_sock2.sh b/samples/bpf/test_cgrp2_sock2.sh
index 0f396a86e0cb..6a3dbe642b2b 100755
--- a/samples/bpf/test_cgrp2_sock2.sh
+++ b/samples/bpf/test_cgrp2_sock2.sh
@@ -1,6 +1,9 @@
#!/bin/bash
# SPDX-License-Identifier: GPL-2.0
+BPFFS=/sys/fs/bpf
+LINK_PIN=$BPFFS/test_cgrp2_sock2
+
function config_device {
ip netns add at_ns0
ip link add veth0 type veth peer name veth0b
@@ -21,16 +24,22 @@ function config_cgroup {
echo $$ >> /tmp/cgroupv2/foo/cgroup.procs
}
+function config_bpffs {
+ if mount | grep $BPFFS > /dev/null; then
+ echo "bpffs already mounted"
+ else
+ echo "bpffs not mounted. Mounting..."
+ mount -t bpf none $BPFFS
+ fi
+}
function attach_bpf {
- test_cgrp2_sock2 /tmp/cgroupv2/foo sock_flags_kern.o $1
+ ./test_cgrp2_sock2 /tmp/cgroupv2/foo sock_flags_kern.o $1
[ $? -ne 0 ] && exit 1
}
function cleanup {
- if [ -d /tmp/cgroupv2/foo ]; then
- test_cgrp2_sock -d /tmp/cgroupv2/foo
- fi
+ rm -rf $LINK_PIN
ip link del veth0b
ip netns delete at_ns0
umount /tmp/cgroupv2
@@ -42,6 +51,7 @@ cleanup 2>/dev/null
set -e
config_device
config_cgroup
+config_bpffs
set +e
#
@@ -62,6 +72,9 @@ if [ $? -eq 0 ]; then
exit 1
fi
+rm -rf $LINK_PIN
+sleep 1 # Wait for link detach
+
#
# Test 2 - fail ping
#
diff --git a/samples/bpf/test_current_task_under_cgroup_kern.c b/samples/bpf/test_current_task_under_cgroup_kern.c
index 6dc4f41bb6cb..fbd43e2bb4d3 100644
--- a/samples/bpf/test_current_task_under_cgroup_kern.c
+++ b/samples/bpf/test_current_task_under_cgroup_kern.c
@@ -10,23 +10,24 @@
#include <linux/version.h>
#include <bpf/bpf_helpers.h>
#include <uapi/linux/utsname.h>
+#include "trace_common.h"
-struct bpf_map_def SEC("maps") cgroup_map = {
- .type = BPF_MAP_TYPE_CGROUP_ARRAY,
- .key_size = sizeof(u32),
- .value_size = sizeof(u32),
- .max_entries = 1,
-};
+struct {
+ __uint(type, BPF_MAP_TYPE_CGROUP_ARRAY);
+ __uint(key_size, sizeof(u32));
+ __uint(value_size, sizeof(u32));
+ __uint(max_entries, 1);
+} cgroup_map SEC(".maps");
-struct bpf_map_def SEC("maps") perf_map = {
- .type = BPF_MAP_TYPE_ARRAY,
- .key_size = sizeof(u32),
- .value_size = sizeof(u64),
- .max_entries = 1,
-};
+struct {
+ __uint(type, BPF_MAP_TYPE_ARRAY);
+ __type(key, u32);
+ __type(value, u64);
+ __uint(max_entries, 1);
+} perf_map SEC(".maps");
/* Writes the last PID that called sync to a map at index 0 */
-SEC("kprobe/sys_sync")
+SEC("kprobe/" SYSCALL(sys_sync))
int bpf_prog1(struct pt_regs *ctx)
{
u64 pid = bpf_get_current_pid_tgid();
diff --git a/samples/bpf/test_current_task_under_cgroup_user.c b/samples/bpf/test_current_task_under_cgroup_user.c
index 06e9f8ce42e2..ac251a417f45 100644
--- a/samples/bpf/test_current_task_under_cgroup_user.c
+++ b/samples/bpf/test_current_task_under_cgroup_user.c
@@ -4,10 +4,9 @@
#define _GNU_SOURCE
#include <stdio.h>
-#include <linux/bpf.h>
#include <unistd.h>
#include <bpf/bpf.h>
-#include "bpf_load.h"
+#include <bpf/libbpf.h>
#include "cgroup_helpers.h"
#define CGROUP_PATH "/my-cgroup"
@@ -15,13 +14,44 @@
int main(int argc, char **argv)
{
pid_t remote_pid, local_pid = getpid();
- int cg2, idx = 0, rc = 0;
+ struct bpf_link *link = NULL;
+ struct bpf_program *prog;
+ int cg2, idx = 0, rc = 1;
+ struct bpf_object *obj;
char filename[256];
+ int map_fd[2];
snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]);
- if (load_bpf_file(filename)) {
- printf("%s", bpf_log_buf);
- return 1;
+ obj = bpf_object__open_file(filename, NULL);
+ if (libbpf_get_error(obj)) {
+ fprintf(stderr, "ERROR: opening BPF object file failed\n");
+ return 0;
+ }
+
+ prog = bpf_object__find_program_by_name(obj, "bpf_prog1");
+ if (!prog) {
+ printf("finding a prog in obj file failed\n");
+ goto cleanup;
+ }
+
+ /* load BPF program */
+ if (bpf_object__load(obj)) {
+ fprintf(stderr, "ERROR: loading BPF object file failed\n");
+ goto cleanup;
+ }
+
+ map_fd[0] = bpf_object__find_map_fd_by_name(obj, "cgroup_map");
+ map_fd[1] = bpf_object__find_map_fd_by_name(obj, "perf_map");
+ if (map_fd[0] < 0 || map_fd[1] < 0) {
+ fprintf(stderr, "ERROR: finding a map in obj file failed\n");
+ goto cleanup;
+ }
+
+ link = bpf_program__attach(prog);
+ if (libbpf_get_error(link)) {
+ fprintf(stderr, "ERROR: bpf_program__attach failed\n");
+ link = NULL;
+ goto cleanup;
}
if (setup_cgroup_environment())
@@ -70,12 +100,14 @@ int main(int argc, char **argv)
goto err;
}
- goto out;
-err:
- rc = 1;
+ rc = 0;
-out:
+err:
close(cg2);
cleanup_cgroup_environment();
+
+cleanup:
+ bpf_link__destroy(link);
+ bpf_object__close(obj);
return rc;
}
diff --git a/samples/bpf/test_ipip.sh b/samples/bpf/test_ipip.sh
deleted file mode 100755
index 9e507c305c6e..000000000000
--- a/samples/bpf/test_ipip.sh
+++ /dev/null
@@ -1,179 +0,0 @@
-#!/bin/bash
-# SPDX-License-Identifier: GPL-2.0
-
-function config_device {
- ip netns add at_ns0
- ip netns add at_ns1
- ip netns add at_ns2
- ip link add veth0 type veth peer name veth0b
- ip link add veth1 type veth peer name veth1b
- ip link add veth2 type veth peer name veth2b
- ip link set veth0b up
- ip link set veth1b up
- ip link set veth2b up
- ip link set dev veth0b mtu 1500
- ip link set dev veth1b mtu 1500
- ip link set dev veth2b mtu 1500
- ip link set veth0 netns at_ns0
- ip link set veth1 netns at_ns1
- ip link set veth2 netns at_ns2
- ip netns exec at_ns0 ip addr add 172.16.1.100/24 dev veth0
- ip netns exec at_ns0 ip addr add 2401:db00::1/64 dev veth0 nodad
- ip netns exec at_ns0 ip link set dev veth0 up
- ip netns exec at_ns1 ip addr add 172.16.1.101/24 dev veth1
- ip netns exec at_ns1 ip addr add 2401:db00::2/64 dev veth1 nodad
- ip netns exec at_ns1 ip link set dev veth1 up
- ip netns exec at_ns2 ip addr add 172.16.1.200/24 dev veth2
- ip netns exec at_ns2 ip addr add 2401:db00::3/64 dev veth2 nodad
- ip netns exec at_ns2 ip link set dev veth2 up
- ip link add br0 type bridge
- ip link set br0 up
- ip link set dev br0 mtu 1500
- ip link set veth0b master br0
- ip link set veth1b master br0
- ip link set veth2b master br0
-}
-
-function add_ipip_tunnel {
- ip netns exec at_ns0 \
- ip link add dev $DEV_NS type ipip local 172.16.1.100 remote 172.16.1.200
- ip netns exec at_ns0 ip link set dev $DEV_NS up
- ip netns exec at_ns0 ip addr add dev $DEV_NS 10.1.1.100/24
- ip netns exec at_ns1 \
- ip link add dev $DEV_NS type ipip local 172.16.1.101 remote 172.16.1.200
- ip netns exec at_ns1 ip link set dev $DEV_NS up
- # same inner IP address in at_ns0 and at_ns1
- ip netns exec at_ns1 ip addr add dev $DEV_NS 10.1.1.100/24
-
- ip netns exec at_ns2 ip link add dev $DEV type ipip external
- ip netns exec at_ns2 ip link set dev $DEV up
- ip netns exec at_ns2 ip addr add dev $DEV 10.1.1.200/24
-}
-
-function add_ipip6_tunnel {
- ip netns exec at_ns0 \
- ip link add dev $DEV_NS type ip6tnl mode ipip6 local 2401:db00::1/64 remote 2401:db00::3/64
- ip netns exec at_ns0 ip link set dev $DEV_NS up
- ip netns exec at_ns0 ip addr add dev $DEV_NS 10.1.1.100/24
- ip netns exec at_ns1 \
- ip link add dev $DEV_NS type ip6tnl mode ipip6 local 2401:db00::2/64 remote 2401:db00::3/64
- ip netns exec at_ns1 ip link set dev $DEV_NS up
- # same inner IP address in at_ns0 and at_ns1
- ip netns exec at_ns1 ip addr add dev $DEV_NS 10.1.1.100/24
-
- ip netns exec at_ns2 ip link add dev $DEV type ip6tnl mode ipip6 external
- ip netns exec at_ns2 ip link set dev $DEV up
- ip netns exec at_ns2 ip addr add dev $DEV 10.1.1.200/24
-}
-
-function add_ip6ip6_tunnel {
- ip netns exec at_ns0 \
- ip link add dev $DEV_NS type ip6tnl mode ip6ip6 local 2401:db00::1/64 remote 2401:db00::3/64
- ip netns exec at_ns0 ip link set dev $DEV_NS up
- ip netns exec at_ns0 ip addr add dev $DEV_NS 2601:646::1/64
- ip netns exec at_ns1 \
- ip link add dev $DEV_NS type ip6tnl mode ip6ip6 local 2401:db00::2/64 remote 2401:db00::3/64
- ip netns exec at_ns1 ip link set dev $DEV_NS up
- # same inner IP address in at_ns0 and at_ns1
- ip netns exec at_ns1 ip addr add dev $DEV_NS 2601:646::1/64
-
- ip netns exec at_ns2 ip link add dev $DEV type ip6tnl mode ip6ip6 external
- ip netns exec at_ns2 ip link set dev $DEV up
- ip netns exec at_ns2 ip addr add dev $DEV 2601:646::2/64
-}
-
-function attach_bpf {
- DEV=$1
- SET_TUNNEL=$2
- GET_TUNNEL=$3
- ip netns exec at_ns2 tc qdisc add dev $DEV clsact
- ip netns exec at_ns2 tc filter add dev $DEV egress bpf da obj tcbpf2_kern.o sec $SET_TUNNEL
- ip netns exec at_ns2 tc filter add dev $DEV ingress bpf da obj tcbpf2_kern.o sec $GET_TUNNEL
-}
-
-function test_ipip {
- DEV_NS=ipip_std
- DEV=ipip_bpf
- config_device
-# tcpdump -nei br0 &
- cat /sys/kernel/debug/tracing/trace_pipe &
-
- add_ipip_tunnel
- attach_bpf $DEV ipip_set_tunnel ipip_get_tunnel
-
- ip netns exec at_ns0 ping -c 1 10.1.1.200
- ip netns exec at_ns2 ping -c 1 10.1.1.100
- ip netns exec at_ns0 iperf -sD -p 5200 > /dev/null
- ip netns exec at_ns1 iperf -sD -p 5201 > /dev/null
- sleep 0.2
- # tcp check _same_ IP over different tunnels
- ip netns exec at_ns2 iperf -c 10.1.1.100 -n 5k -p 5200
- ip netns exec at_ns2 iperf -c 10.1.1.100 -n 5k -p 5201
- cleanup
-}
-
-# IPv4 over IPv6 tunnel
-function test_ipip6 {
- DEV_NS=ipip_std
- DEV=ipip_bpf
- config_device
-# tcpdump -nei br0 &
- cat /sys/kernel/debug/tracing/trace_pipe &
-
- add_ipip6_tunnel
- attach_bpf $DEV ipip6_set_tunnel ipip6_get_tunnel
-
- ip netns exec at_ns0 ping -c 1 10.1.1.200
- ip netns exec at_ns2 ping -c 1 10.1.1.100
- ip netns exec at_ns0 iperf -sD -p 5200 > /dev/null
- ip netns exec at_ns1 iperf -sD -p 5201 > /dev/null
- sleep 0.2
- # tcp check _same_ IP over different tunnels
- ip netns exec at_ns2 iperf -c 10.1.1.100 -n 5k -p 5200
- ip netns exec at_ns2 iperf -c 10.1.1.100 -n 5k -p 5201
- cleanup
-}
-
-# IPv6 over IPv6 tunnel
-function test_ip6ip6 {
- DEV_NS=ipip_std
- DEV=ipip_bpf
- config_device
-# tcpdump -nei br0 &
- cat /sys/kernel/debug/tracing/trace_pipe &
-
- add_ip6ip6_tunnel
- attach_bpf $DEV ip6ip6_set_tunnel ip6ip6_get_tunnel
-
- ip netns exec at_ns0 ping -6 -c 1 2601:646::2
- ip netns exec at_ns2 ping -6 -c 1 2601:646::1
- ip netns exec at_ns0 iperf -6sD -p 5200 > /dev/null
- ip netns exec at_ns1 iperf -6sD -p 5201 > /dev/null
- sleep 0.2
- # tcp check _same_ IP over different tunnels
- ip netns exec at_ns2 iperf -6c 2601:646::1 -n 5k -p 5200
- ip netns exec at_ns2 iperf -6c 2601:646::1 -n 5k -p 5201
- cleanup
-}
-
-function cleanup {
- set +ex
- pkill iperf
- ip netns delete at_ns0
- ip netns delete at_ns1
- ip netns delete at_ns2
- ip link del veth0
- ip link del veth1
- ip link del veth2
- ip link del br0
- pkill tcpdump
- pkill cat
- set -ex
-}
-
-cleanup
-echo "Testing IP tunnels..."
-test_ipip
-test_ipip6
-test_ip6ip6
-echo "*** PASS ***"
diff --git a/samples/bpf/test_lru_dist.c b/samples/bpf/test_lru_dist.c
index b313dba4111b..5efb91763d65 100644
--- a/samples/bpf/test_lru_dist.c
+++ b/samples/bpf/test_lru_dist.c
@@ -13,7 +13,6 @@
#include <sched.h>
#include <sys/wait.h>
#include <sys/stat.h>
-#include <sys/resource.h>
#include <fcntl.h>
#include <stdlib.h>
#include <time.h>
@@ -105,10 +104,10 @@ struct pfect_lru {
static void pfect_lru_init(struct pfect_lru *lru, unsigned int lru_size,
unsigned int nr_possible_elems)
{
- lru->map_fd = bpf_create_map(BPF_MAP_TYPE_HASH,
+ lru->map_fd = bpf_map_create(BPF_MAP_TYPE_HASH, NULL,
sizeof(unsigned long long),
sizeof(struct pfect_lru_node *),
- nr_possible_elems, 0);
+ nr_possible_elems, NULL);
assert(lru->map_fd != -1);
lru->free_nodes = malloc(lru_size * sizeof(struct pfect_lru_node));
@@ -207,10 +206,13 @@ static unsigned int read_keys(const char *dist_file,
static int create_map(int map_type, int map_flags, unsigned int size)
{
+ LIBBPF_OPTS(bpf_map_create_opts, opts,
+ .map_flags = map_flags,
+ );
int map_fd;
- map_fd = bpf_create_map(map_type, sizeof(unsigned long long),
- sizeof(unsigned long long), size, map_flags);
+ map_fd = bpf_map_create(map_type, NULL, sizeof(unsigned long long),
+ sizeof(unsigned long long), size, &opts);
if (map_fd == -1)
perror("bpf_create_map");
@@ -489,7 +491,6 @@ static void test_parallel_lru_loss(int map_type, int map_flags, int nr_tasks)
int main(int argc, char **argv)
{
- struct rlimit r = {RLIM_INFINITY, RLIM_INFINITY};
int map_flags[] = {0, BPF_F_NO_COMMON_LRU};
const char *dist_file;
int nr_tasks = 1;
@@ -508,8 +509,6 @@ int main(int argc, char **argv)
setbuf(stdout, NULL);
- assert(!setrlimit(RLIMIT_MEMLOCK, &r));
-
srand(time(NULL));
nr_cpus = bpf_num_possible_cpus();
@@ -524,7 +523,7 @@ int main(int argc, char **argv)
return -1;
}
- for (f = 0; f < sizeof(map_flags) / sizeof(*map_flags); f++) {
+ for (f = 0; f < ARRAY_SIZE(map_flags); f++) {
test_lru_loss0(BPF_MAP_TYPE_LRU_HASH, map_flags[f]);
test_lru_loss1(BPF_MAP_TYPE_LRU_HASH, map_flags[f]);
test_parallel_lru_loss(BPF_MAP_TYPE_LRU_HASH, map_flags[f],
diff --git a/samples/bpf/test_lwt_bpf.sh b/samples/bpf/test_lwt_bpf.sh
index 65a976058dd3..65a976058dd3 100644..100755
--- a/samples/bpf/test_lwt_bpf.sh
+++ b/samples/bpf/test_lwt_bpf.sh
diff --git a/samples/bpf/test_map_in_map_kern.c b/samples/bpf/test_map_in_map_kern.c
index 6cee61e8ce9b..b0200c8eac09 100644
--- a/samples/bpf/test_map_in_map_kern.c
+++ b/samples/bpf/test_map_in_map_kern.c
@@ -11,66 +11,67 @@
#include <uapi/linux/bpf.h>
#include <uapi/linux/in6.h>
#include <bpf/bpf_helpers.h>
-#include "bpf_legacy.h"
#include <bpf/bpf_tracing.h>
+#include <bpf/bpf_core_read.h>
+#include "trace_common.h"
#define MAX_NR_PORTS 65536
/* map #0 */
-struct bpf_map_def_legacy SEC("maps") port_a = {
- .type = BPF_MAP_TYPE_ARRAY,
- .key_size = sizeof(u32),
- .value_size = sizeof(int),
- .max_entries = MAX_NR_PORTS,
-};
+struct inner_a {
+ __uint(type, BPF_MAP_TYPE_ARRAY);
+ __type(key, u32);
+ __type(value, int);
+ __uint(max_entries, MAX_NR_PORTS);
+} port_a SEC(".maps");
/* map #1 */
-struct bpf_map_def_legacy SEC("maps") port_h = {
- .type = BPF_MAP_TYPE_HASH,
- .key_size = sizeof(u32),
- .value_size = sizeof(int),
- .max_entries = 1,
-};
+struct inner_h {
+ __uint(type, BPF_MAP_TYPE_HASH);
+ __type(key, u32);
+ __type(value, int);
+ __uint(max_entries, 1);
+} port_h SEC(".maps");
/* map #2 */
-struct bpf_map_def_legacy SEC("maps") reg_result_h = {
- .type = BPF_MAP_TYPE_HASH,
- .key_size = sizeof(u32),
- .value_size = sizeof(int),
- .max_entries = 1,
-};
+struct {
+ __uint(type, BPF_MAP_TYPE_HASH);
+ __type(key, u32);
+ __type(value, int);
+ __uint(max_entries, 1);
+} reg_result_h SEC(".maps");
/* map #3 */
-struct bpf_map_def_legacy SEC("maps") inline_result_h = {
- .type = BPF_MAP_TYPE_HASH,
- .key_size = sizeof(u32),
- .value_size = sizeof(int),
- .max_entries = 1,
-};
+struct {
+ __uint(type, BPF_MAP_TYPE_HASH);
+ __type(key, u32);
+ __type(value, int);
+ __uint(max_entries, 1);
+} inline_result_h SEC(".maps");
/* map #4 */ /* Test case #0 */
-struct bpf_map_def_legacy SEC("maps") a_of_port_a = {
- .type = BPF_MAP_TYPE_ARRAY_OF_MAPS,
- .key_size = sizeof(u32),
- .inner_map_idx = 0, /* map_fd[0] is port_a */
- .max_entries = MAX_NR_PORTS,
-};
+struct {
+ __uint(type, BPF_MAP_TYPE_ARRAY_OF_MAPS);
+ __uint(max_entries, MAX_NR_PORTS);
+ __uint(key_size, sizeof(u32));
+ __array(values, struct inner_a); /* use inner_a as inner map */
+} a_of_port_a SEC(".maps");
/* map #5 */ /* Test case #1 */
-struct bpf_map_def_legacy SEC("maps") h_of_port_a = {
- .type = BPF_MAP_TYPE_HASH_OF_MAPS,
- .key_size = sizeof(u32),
- .inner_map_idx = 0, /* map_fd[0] is port_a */
- .max_entries = 1,
-};
+struct {
+ __uint(type, BPF_MAP_TYPE_HASH_OF_MAPS);
+ __uint(max_entries, 1);
+ __uint(key_size, sizeof(u32));
+ __array(values, struct inner_a); /* use inner_a as inner map */
+} h_of_port_a SEC(".maps");
/* map #6 */ /* Test case #2 */
-struct bpf_map_def_legacy SEC("maps") h_of_port_h = {
- .type = BPF_MAP_TYPE_HASH_OF_MAPS,
- .key_size = sizeof(u32),
- .inner_map_idx = 1, /* map_fd[1] is port_h */
- .max_entries = 1,
-};
+struct {
+ __uint(type, BPF_MAP_TYPE_HASH_OF_MAPS);
+ __uint(max_entries, 1);
+ __uint(key_size, sizeof(u32));
+ __array(values, struct inner_h); /* use inner_h as inner map */
+} h_of_port_h SEC(".maps");
static __always_inline int do_reg_lookup(void *inner_map, u32 port)
{
@@ -102,7 +103,7 @@ static __always_inline int do_inline_hash_lookup(void *inner_map, u32 port)
return result ? *result : -ENOENT;
}
-SEC("kprobe/sys_connect")
+SEC("kprobe/__sys_connect")
int trace_sys_connect(struct pt_regs *ctx)
{
struct sockaddr_in6 *in6;
@@ -112,8 +113,8 @@ int trace_sys_connect(struct pt_regs *ctx)
void *outer_map, *inner_map;
bool inline_hash = false;
- in6 = (struct sockaddr_in6 *)PT_REGS_PARM2(ctx);
- addrlen = (int)PT_REGS_PARM3(ctx);
+ in6 = (struct sockaddr_in6 *)PT_REGS_PARM2_CORE(ctx);
+ addrlen = (int)PT_REGS_PARM3_CORE(ctx);
if (addrlen != sizeof(*in6))
return 0;
diff --git a/samples/bpf/test_map_in_map_user.c b/samples/bpf/test_map_in_map_user.c
index eb29bcb76f3f..652ec720533d 100644
--- a/samples/bpf/test_map_in_map_user.c
+++ b/samples/bpf/test_map_in_map_user.c
@@ -2,7 +2,6 @@
/*
* Copyright (c) 2017 Facebook
*/
-#include <sys/resource.h>
#include <sys/socket.h>
#include <arpa/inet.h>
#include <stdint.h>
@@ -11,7 +10,11 @@
#include <stdlib.h>
#include <stdio.h>
#include <bpf/bpf.h>
-#include "bpf_load.h"
+#include <bpf/libbpf.h>
+
+#include "bpf_util.h"
+
+static int map_fd[7];
#define PORT_A (map_fd[0])
#define PORT_H (map_fd[1])
@@ -27,7 +30,7 @@ static const char * const test_names[] = {
"Hash of Hash",
};
-#define NR_TESTS (sizeof(test_names) / sizeof(*test_names))
+#define NR_TESTS ARRAY_SIZE(test_names)
static void check_map_id(int inner_map_fd, int map_in_map_fd, uint32_t key)
{
@@ -112,19 +115,54 @@ static void test_map_in_map(void)
int main(int argc, char **argv)
{
- struct rlimit r = {RLIM_INFINITY, RLIM_INFINITY};
+ struct bpf_link *link = NULL;
+ struct bpf_program *prog;
+ struct bpf_object *obj;
char filename[256];
- assert(!setrlimit(RLIMIT_MEMLOCK, &r));
-
snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]);
+ obj = bpf_object__open_file(filename, NULL);
+ if (libbpf_get_error(obj)) {
+ fprintf(stderr, "ERROR: opening BPF object file failed\n");
+ return 0;
+ }
+
+ prog = bpf_object__find_program_by_name(obj, "trace_sys_connect");
+ if (!prog) {
+ printf("finding a prog in obj file failed\n");
+ goto cleanup;
+ }
+
+ /* load BPF program */
+ if (bpf_object__load(obj)) {
+ fprintf(stderr, "ERROR: loading BPF object file failed\n");
+ goto cleanup;
+ }
+
+ map_fd[0] = bpf_object__find_map_fd_by_name(obj, "port_a");
+ map_fd[1] = bpf_object__find_map_fd_by_name(obj, "port_h");
+ map_fd[2] = bpf_object__find_map_fd_by_name(obj, "reg_result_h");
+ map_fd[3] = bpf_object__find_map_fd_by_name(obj, "inline_result_h");
+ map_fd[4] = bpf_object__find_map_fd_by_name(obj, "a_of_port_a");
+ map_fd[5] = bpf_object__find_map_fd_by_name(obj, "h_of_port_a");
+ map_fd[6] = bpf_object__find_map_fd_by_name(obj, "h_of_port_h");
+ if (map_fd[0] < 0 || map_fd[1] < 0 || map_fd[2] < 0 ||
+ map_fd[3] < 0 || map_fd[4] < 0 || map_fd[5] < 0 || map_fd[6] < 0) {
+ fprintf(stderr, "ERROR: finding a map in obj file failed\n");
+ goto cleanup;
+ }
- if (load_bpf_file(filename)) {
- printf("%s", bpf_log_buf);
- return 1;
+ link = bpf_program__attach(prog);
+ if (libbpf_get_error(link)) {
+ fprintf(stderr, "ERROR: bpf_program__attach failed\n");
+ link = NULL;
+ goto cleanup;
}
test_map_in_map();
+cleanup:
+ bpf_link__destroy(link);
+ bpf_object__close(obj);
return 0;
}
diff --git a/samples/bpf/test_overhead_kprobe_kern.c b/samples/bpf/test_overhead_kprobe_kern.c
index 8b811c29dc79..8fdd2c9c56b2 100644
--- a/samples/bpf/test_overhead_kprobe_kern.c
+++ b/samples/bpf/test_overhead_kprobe_kern.c
@@ -6,27 +6,34 @@
*/
#include <linux/version.h>
#include <linux/ptrace.h>
+#include <linux/sched.h>
#include <uapi/linux/bpf.h>
#include <bpf/bpf_helpers.h>
#include <bpf/bpf_tracing.h>
-#define _(P) ({typeof(P) val = 0; bpf_probe_read(&val, sizeof(val), &P); val;})
+#define _(P) \
+ ({ \
+ typeof(P) val = 0; \
+ bpf_probe_read_kernel(&val, sizeof(val), &(P)); \
+ val; \
+ })
SEC("kprobe/__set_task_comm")
int prog(struct pt_regs *ctx)
{
struct signal_struct *signal;
struct task_struct *tsk;
- char oldcomm[16] = {};
- char newcomm[16] = {};
+ char oldcomm[TASK_COMM_LEN] = {};
+ char newcomm[TASK_COMM_LEN] = {};
u16 oom_score_adj;
u32 pid;
tsk = (void *)PT_REGS_PARM1(ctx);
pid = _(tsk->pid);
- bpf_probe_read(oldcomm, sizeof(oldcomm), &tsk->comm);
- bpf_probe_read(newcomm, sizeof(newcomm), (void *)PT_REGS_PARM2(ctx));
+ bpf_probe_read_kernel_str(oldcomm, sizeof(oldcomm), &tsk->comm);
+ bpf_probe_read_kernel_str(newcomm, sizeof(newcomm),
+ (void *)PT_REGS_PARM2(ctx));
signal = _(tsk->signal);
oom_score_adj = _(signal->oom_score_adj);
return 0;
diff --git a/samples/bpf/test_overhead_tp_kern.c b/samples/bpf/test_overhead_tp_kern.c
index eaa32693f8fc..80edadacb692 100644
--- a/samples/bpf/test_overhead_tp_kern.c
+++ b/samples/bpf/test_overhead_tp_kern.c
@@ -4,6 +4,7 @@
* modify it under the terms of version 2 of the GNU General Public
* License as published by the Free Software Foundation.
*/
+#include <linux/sched.h>
#include <uapi/linux/bpf.h>
#include <bpf/bpf_helpers.h>
@@ -11,8 +12,8 @@
struct task_rename {
__u64 pad;
__u32 pid;
- char oldcomm[16];
- char newcomm[16];
+ char oldcomm[TASK_COMM_LEN];
+ char newcomm[TASK_COMM_LEN];
__u16 oom_score_adj;
};
SEC("tracepoint/task/task_rename")
diff --git a/samples/bpf/test_overhead_user.c b/samples/bpf/test_overhead_user.c
index 94f74112a20e..88717f8ec6ac 100644
--- a/samples/bpf/test_overhead_user.c
+++ b/samples/bpf/test_overhead_user.c
@@ -16,12 +16,15 @@
#include <linux/bpf.h>
#include <string.h>
#include <time.h>
-#include <sys/resource.h>
#include <bpf/bpf.h>
-#include "bpf_load.h"
+#include <bpf/libbpf.h>
#define MAX_CNT 1000000
+static struct bpf_link *links[2];
+static struct bpf_object *obj;
+static int cnt;
+
static __u64 time_get_ns(void)
{
struct timespec ts;
@@ -115,22 +118,54 @@ static void run_perf_test(int tasks, int flags)
}
}
+static int load_progs(char *filename)
+{
+ struct bpf_program *prog;
+ int err = 0;
+
+ obj = bpf_object__open_file(filename, NULL);
+ err = libbpf_get_error(obj);
+ if (err < 0) {
+ fprintf(stderr, "ERROR: opening BPF object file failed\n");
+ return err;
+ }
+
+ /* load BPF program */
+ err = bpf_object__load(obj);
+ if (err < 0) {
+ fprintf(stderr, "ERROR: loading BPF object file failed\n");
+ return err;
+ }
+
+ bpf_object__for_each_program(prog, obj) {
+ links[cnt] = bpf_program__attach(prog);
+ err = libbpf_get_error(links[cnt]);
+ if (err < 0) {
+ fprintf(stderr, "ERROR: bpf_program__attach failed\n");
+ links[cnt] = NULL;
+ return err;
+ }
+ cnt++;
+ }
+
+ return err;
+}
+
static void unload_progs(void)
{
- close(prog_fd[0]);
- close(prog_fd[1]);
- close(event_fd[0]);
- close(event_fd[1]);
+ while (cnt)
+ bpf_link__destroy(links[--cnt]);
+
+ bpf_object__close(obj);
}
int main(int argc, char **argv)
{
- struct rlimit r = {RLIM_INFINITY, RLIM_INFINITY};
- char filename[256];
- int num_cpu = 8;
+ int num_cpu = sysconf(_SC_NPROCESSORS_ONLN);
int test_flags = ~0;
+ char filename[256];
+ int err = 0;
- setrlimit(RLIMIT_MEMLOCK, &r);
if (argc > 1)
test_flags = atoi(argv[1]) ? : test_flags;
@@ -145,38 +180,36 @@ int main(int argc, char **argv)
if (test_flags & 0xC) {
snprintf(filename, sizeof(filename),
"%s_kprobe_kern.o", argv[0]);
- if (load_bpf_file(filename)) {
- printf("%s", bpf_log_buf);
- return 1;
- }
+
printf("w/KPROBE\n");
- run_perf_test(num_cpu, test_flags >> 2);
+ err = load_progs(filename);
+ if (!err)
+ run_perf_test(num_cpu, test_flags >> 2);
+
unload_progs();
}
if (test_flags & 0x30) {
snprintf(filename, sizeof(filename),
"%s_tp_kern.o", argv[0]);
- if (load_bpf_file(filename)) {
- printf("%s", bpf_log_buf);
- return 1;
- }
printf("w/TRACEPOINT\n");
- run_perf_test(num_cpu, test_flags >> 4);
+ err = load_progs(filename);
+ if (!err)
+ run_perf_test(num_cpu, test_flags >> 4);
+
unload_progs();
}
if (test_flags & 0xC0) {
snprintf(filename, sizeof(filename),
"%s_raw_tp_kern.o", argv[0]);
- if (load_bpf_file(filename)) {
- printf("%s", bpf_log_buf);
- return 1;
- }
printf("w/RAW_TRACEPOINT\n");
- run_perf_test(num_cpu, test_flags >> 6);
+ err = load_progs(filename);
+ if (!err)
+ run_perf_test(num_cpu, test_flags >> 6);
+
unload_progs();
}
- return 0;
+ return err;
}
diff --git a/samples/bpf/test_override_return.sh b/samples/bpf/test_override_return.sh
index e68b9ee6814b..35db26f736b9 100755
--- a/samples/bpf/test_override_return.sh
+++ b/samples/bpf/test_override_return.sh
@@ -1,5 +1,6 @@
#!/bin/bash
+rm -r tmpmnt
rm -f testfile.img
dd if=/dev/zero of=testfile.img bs=1M seek=1000 count=1
DEVICE=$(losetup --show -f testfile.img)
diff --git a/samples/bpf/test_probe_write_user_kern.c b/samples/bpf/test_probe_write_user_kern.c
index f033f36a13a3..220a96438d75 100644
--- a/samples/bpf/test_probe_write_user_kern.c
+++ b/samples/bpf/test_probe_write_user_kern.c
@@ -10,13 +10,15 @@
#include <linux/version.h>
#include <bpf/bpf_helpers.h>
#include <bpf/bpf_tracing.h>
+#include <bpf/bpf_core_read.h>
+#include "trace_common.h"
-struct bpf_map_def SEC("maps") dnat_map = {
- .type = BPF_MAP_TYPE_HASH,
- .key_size = sizeof(struct sockaddr_in),
- .value_size = sizeof(struct sockaddr_in),
- .max_entries = 256,
-};
+struct {
+ __uint(type, BPF_MAP_TYPE_HASH);
+ __type(key, struct sockaddr_in);
+ __type(value, struct sockaddr_in);
+ __uint(max_entries, 256);
+} dnat_map SEC(".maps");
/* kprobe is NOT a stable ABI
* kernel functions can be removed, renamed or completely change semantics.
@@ -26,13 +28,14 @@ struct bpf_map_def SEC("maps") dnat_map = {
* This example sits on a syscall, and the syscall ABI is relatively stable
* of course, across platforms, and over time, the ABI may change.
*/
-SEC("kprobe/sys_connect")
+SEC("kprobe/" SYSCALL(sys_connect))
int bpf_prog1(struct pt_regs *ctx)
{
+ struct pt_regs *real_regs = (struct pt_regs *)PT_REGS_PARM1_CORE(ctx);
+ void *sockaddr_arg = (void *)PT_REGS_PARM2_CORE(real_regs);
+ int sockaddr_len = (int)PT_REGS_PARM3_CORE(real_regs);
struct sockaddr_in new_addr, orig_addr = {};
struct sockaddr_in *mapped_addr;
- void *sockaddr_arg = (void *)PT_REGS_PARM2(ctx);
- int sockaddr_len = (int)PT_REGS_PARM3(ctx);
if (sockaddr_len > sizeof(orig_addr))
return 0;
diff --git a/samples/bpf/test_probe_write_user_user.c b/samples/bpf/test_probe_write_user_user.c
index 045eb5e30f54..00ccfb834e45 100644
--- a/samples/bpf/test_probe_write_user_user.c
+++ b/samples/bpf/test_probe_write_user_user.c
@@ -1,21 +1,22 @@
// SPDX-License-Identifier: GPL-2.0
#include <stdio.h>
#include <assert.h>
-#include <linux/bpf.h>
#include <unistd.h>
#include <bpf/bpf.h>
-#include "bpf_load.h"
+#include <bpf/libbpf.h>
#include <sys/socket.h>
-#include <string.h>
#include <netinet/in.h>
#include <arpa/inet.h>
int main(int ac, char **argv)
{
- int serverfd, serverconnfd, clientfd;
- socklen_t sockaddr_len;
- struct sockaddr serv_addr, mapped_addr, tmp_addr;
struct sockaddr_in *serv_addr_in, *mapped_addr_in, *tmp_addr_in;
+ struct sockaddr serv_addr, mapped_addr, tmp_addr;
+ int serverfd, serverconnfd, clientfd, map_fd;
+ struct bpf_link *link = NULL;
+ struct bpf_program *prog;
+ struct bpf_object *obj;
+ socklen_t sockaddr_len;
char filename[256];
char *ip;
@@ -24,10 +25,35 @@ int main(int ac, char **argv)
tmp_addr_in = (struct sockaddr_in *)&tmp_addr;
snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]);
+ obj = bpf_object__open_file(filename, NULL);
+ if (libbpf_get_error(obj)) {
+ fprintf(stderr, "ERROR: opening BPF object file failed\n");
+ return 0;
+ }
+
+ prog = bpf_object__find_program_by_name(obj, "bpf_prog1");
+ if (libbpf_get_error(prog)) {
+ fprintf(stderr, "ERROR: finding a prog in obj file failed\n");
+ goto cleanup;
+ }
+
+ /* load BPF program */
+ if (bpf_object__load(obj)) {
+ fprintf(stderr, "ERROR: loading BPF object file failed\n");
+ goto cleanup;
+ }
+
+ map_fd = bpf_object__find_map_fd_by_name(obj, "dnat_map");
+ if (map_fd < 0) {
+ fprintf(stderr, "ERROR: finding a map in obj file failed\n");
+ goto cleanup;
+ }
- if (load_bpf_file(filename)) {
- printf("%s", bpf_log_buf);
- return 1;
+ link = bpf_program__attach(prog);
+ if (libbpf_get_error(link)) {
+ fprintf(stderr, "ERROR: bpf_program__attach failed\n");
+ link = NULL;
+ goto cleanup;
}
assert((serverfd = socket(AF_INET, SOCK_STREAM, 0)) > 0);
@@ -51,7 +77,7 @@ int main(int ac, char **argv)
mapped_addr_in->sin_port = htons(5555);
mapped_addr_in->sin_addr.s_addr = inet_addr("255.255.255.255");
- assert(!bpf_map_update_elem(map_fd[0], &mapped_addr, &serv_addr, BPF_ANY));
+ assert(!bpf_map_update_elem(map_fd, &mapped_addr, &serv_addr, BPF_ANY));
assert(listen(serverfd, 5) == 0);
@@ -75,5 +101,8 @@ int main(int ac, char **argv)
/* Is the server's getsockname = the socket getpeername */
assert(memcmp(&serv_addr, &tmp_addr, sizeof(struct sockaddr_in)) == 0);
+cleanup:
+ bpf_link__destroy(link);
+ bpf_object__close(obj);
return 0;
}
diff --git a/samples/bpf/trace_common.h b/samples/bpf/trace_common.h
new file mode 100644
index 000000000000..8cb5400aed1f
--- /dev/null
+++ b/samples/bpf/trace_common.h
@@ -0,0 +1,13 @@
+// SPDX-License-Identifier: GPL-2.0
+#ifndef __TRACE_COMMON_H
+#define __TRACE_COMMON_H
+
+#ifdef __x86_64__
+#define SYSCALL(SYS) "__x64_" __stringify(SYS)
+#elif defined(__s390x__)
+#define SYSCALL(SYS) "__s390x_" __stringify(SYS)
+#else
+#define SYSCALL(SYS) __stringify(SYS)
+#endif
+
+#endif
diff --git a/samples/bpf/trace_event_kern.c b/samples/bpf/trace_event_kern.c
index da1d69e20645..0bba5fcd7d24 100644
--- a/samples/bpf/trace_event_kern.c
+++ b/samples/bpf/trace_event_kern.c
@@ -5,7 +5,6 @@
* License as published by the Free Software Foundation.
*/
#include <linux/ptrace.h>
-#include <linux/version.h>
#include <uapi/linux/bpf.h>
#include <uapi/linux/bpf_perf_event.h>
#include <uapi/linux/perf_event.h>
@@ -18,19 +17,19 @@ struct key_t {
u32 userstack;
};
-struct bpf_map_def SEC("maps") counts = {
- .type = BPF_MAP_TYPE_HASH,
- .key_size = sizeof(struct key_t),
- .value_size = sizeof(u64),
- .max_entries = 10000,
-};
+struct {
+ __uint(type, BPF_MAP_TYPE_HASH);
+ __type(key, struct key_t);
+ __type(value, u64);
+ __uint(max_entries, 10000);
+} counts SEC(".maps");
-struct bpf_map_def SEC("maps") stackmap = {
- .type = BPF_MAP_TYPE_STACK_TRACE,
- .key_size = sizeof(u32),
- .value_size = PERF_MAX_STACK_DEPTH * sizeof(u64),
- .max_entries = 10000,
-};
+struct {
+ __uint(type, BPF_MAP_TYPE_STACK_TRACE);
+ __uint(key_size, sizeof(u32));
+ __uint(value_size, PERF_MAX_STACK_DEPTH * sizeof(u64));
+ __uint(max_entries, 10000);
+} stackmap SEC(".maps");
#define KERN_STACKID_FLAGS (0 | BPF_F_FAST_STACK_CMP)
#define USER_STACKID_FLAGS (0 | BPF_F_FAST_STACK_CMP | BPF_F_USER_STACK)
diff --git a/samples/bpf/trace_event_user.c b/samples/bpf/trace_event_user.c
index 356171bc392b..9664749bf618 100644
--- a/samples/bpf/trace_event_user.c
+++ b/samples/bpf/trace_event_user.c
@@ -6,22 +6,22 @@
#include <stdlib.h>
#include <stdbool.h>
#include <string.h>
-#include <fcntl.h>
-#include <poll.h>
-#include <sys/ioctl.h>
#include <linux/perf_event.h>
#include <linux/bpf.h>
#include <signal.h>
-#include <assert.h>
#include <errno.h>
#include <sys/resource.h>
+#include <bpf/bpf.h>
#include <bpf/libbpf.h>
-#include "bpf_load.h"
#include "perf-sys.h"
#include "trace_helpers.h"
#define SAMPLE_FREQ 50
+static int pid;
+/* counts, stackmap */
+static int map_fd[2];
+struct bpf_program *prog;
static bool sys_read_seen, sys_write_seen;
static void print_ksym(__u64 addr)
@@ -91,10 +91,10 @@ static void print_stack(struct key_t *key, __u64 count)
}
}
-static void int_exit(int sig)
+static void err_exit(int err)
{
- kill(0, SIGKILL);
- exit(0);
+ kill(pid, SIGKILL);
+ exit(err);
}
static void print_stacks(void)
@@ -102,7 +102,7 @@ static void print_stacks(void)
struct key_t key = {}, next_key;
__u64 value;
__u32 stackid = 0, next_id;
- int fd = map_fd[0], stack_map = map_fd[1];
+ int error = 1, fd = map_fd[0], stack_map = map_fd[1];
sys_read_seen = sys_write_seen = false;
while (bpf_map_get_next_key(fd, &key, &next_key) == 0) {
@@ -114,7 +114,7 @@ static void print_stacks(void)
printf("\n");
if (!sys_read_seen || !sys_write_seen) {
printf("BUG kernel stack doesn't contain sys_read() and sys_write()\n");
- int_exit(0);
+ err_exit(error);
}
/* clear stack map */
@@ -136,43 +136,52 @@ static inline int generate_load(void)
static void test_perf_event_all_cpu(struct perf_event_attr *attr)
{
- int nr_cpus = sysconf(_SC_NPROCESSORS_CONF);
- int *pmu_fd = malloc(nr_cpus * sizeof(int));
- int i, error = 0;
+ int nr_cpus = sysconf(_SC_NPROCESSORS_ONLN);
+ struct bpf_link **links = calloc(nr_cpus, sizeof(struct bpf_link *));
+ int i, pmu_fd, error = 1;
+
+ if (!links) {
+ printf("malloc of links failed\n");
+ goto err;
+ }
/* system wide perf event, no need to inherit */
attr->inherit = 0;
/* open perf_event on all cpus */
for (i = 0; i < nr_cpus; i++) {
- pmu_fd[i] = sys_perf_event_open(attr, -1, i, -1, 0);
- if (pmu_fd[i] < 0) {
+ pmu_fd = sys_perf_event_open(attr, -1, i, -1, 0);
+ if (pmu_fd < 0) {
printf("sys_perf_event_open failed\n");
- error = 1;
goto all_cpu_err;
}
- assert(ioctl(pmu_fd[i], PERF_EVENT_IOC_SET_BPF, prog_fd[0]) == 0);
- assert(ioctl(pmu_fd[i], PERF_EVENT_IOC_ENABLE) == 0);
+ links[i] = bpf_program__attach_perf_event(prog, pmu_fd);
+ if (libbpf_get_error(links[i])) {
+ printf("bpf_program__attach_perf_event failed\n");
+ links[i] = NULL;
+ close(pmu_fd);
+ goto all_cpu_err;
+ }
}
- if (generate_load() < 0) {
- error = 1;
+ if (generate_load() < 0)
goto all_cpu_err;
- }
+
print_stacks();
+ error = 0;
all_cpu_err:
- for (i--; i >= 0; i--) {
- ioctl(pmu_fd[i], PERF_EVENT_IOC_DISABLE);
- close(pmu_fd[i]);
- }
- free(pmu_fd);
+ for (i--; i >= 0; i--)
+ bpf_link__destroy(links[i]);
+err:
+ free(links);
if (error)
- int_exit(0);
+ err_exit(error);
}
static void test_perf_event_task(struct perf_event_attr *attr)
{
- int pmu_fd, error = 0;
+ struct bpf_link *link = NULL;
+ int pmu_fd, error = 1;
/* per task perf event, enable inherit so the "dd ..." command can be traced properly.
* Enabling inherit will cause bpf_perf_prog_read_time helper failure.
@@ -183,21 +192,25 @@ static void test_perf_event_task(struct perf_event_attr *attr)
pmu_fd = sys_perf_event_open(attr, 0, -1, -1, 0);
if (pmu_fd < 0) {
printf("sys_perf_event_open failed\n");
- int_exit(0);
+ goto err;
}
- assert(ioctl(pmu_fd, PERF_EVENT_IOC_SET_BPF, prog_fd[0]) == 0);
- assert(ioctl(pmu_fd, PERF_EVENT_IOC_ENABLE) == 0);
-
- if (generate_load() < 0) {
- error = 1;
+ link = bpf_program__attach_perf_event(prog, pmu_fd);
+ if (libbpf_get_error(link)) {
+ printf("bpf_program__attach_perf_event failed\n");
+ link = NULL;
+ close(pmu_fd);
goto err;
}
+
+ if (generate_load() < 0)
+ goto err;
+
print_stacks();
+ error = 0;
err:
- ioctl(pmu_fd, PERF_EVENT_IOC_DISABLE);
- close(pmu_fd);
+ bpf_link__destroy(link);
if (error)
- int_exit(0);
+ err_exit(error);
}
static void test_bpf_perf_event(void)
@@ -281,30 +294,59 @@ static void test_bpf_perf_event(void)
int main(int argc, char **argv)
{
- struct rlimit r = {RLIM_INFINITY, RLIM_INFINITY};
+ struct bpf_object *obj = NULL;
char filename[256];
+ int error = 1;
snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]);
- setrlimit(RLIMIT_MEMLOCK, &r);
- signal(SIGINT, int_exit);
- signal(SIGTERM, int_exit);
+ signal(SIGINT, err_exit);
+ signal(SIGTERM, err_exit);
if (load_kallsyms()) {
printf("failed to process /proc/kallsyms\n");
- return 1;
+ goto cleanup;
+ }
+
+ obj = bpf_object__open_file(filename, NULL);
+ if (libbpf_get_error(obj)) {
+ printf("opening BPF object file failed\n");
+ obj = NULL;
+ goto cleanup;
+ }
+
+ prog = bpf_object__find_program_by_name(obj, "bpf_prog1");
+ if (!prog) {
+ printf("finding a prog in obj file failed\n");
+ goto cleanup;
}
- if (load_bpf_file(filename)) {
- printf("%s", bpf_log_buf);
- return 2;
+ /* load BPF program */
+ if (bpf_object__load(obj)) {
+ printf("loading BPF object file failed\n");
+ goto cleanup;
}
- if (fork() == 0) {
+ map_fd[0] = bpf_object__find_map_fd_by_name(obj, "counts");
+ map_fd[1] = bpf_object__find_map_fd_by_name(obj, "stackmap");
+ if (map_fd[0] < 0 || map_fd[1] < 0) {
+ printf("finding a counts/stackmap map in obj file failed\n");
+ goto cleanup;
+ }
+
+ pid = fork();
+ if (pid == 0) {
read_trace_pipe();
return 0;
+ } else if (pid == -1) {
+ printf("couldn't spawn process\n");
+ goto cleanup;
}
+
test_bpf_perf_event();
- int_exit(0);
- return 0;
+ error = 0;
+
+cleanup:
+ bpf_object__close(obj);
+ err_exit(error);
}
diff --git a/samples/bpf/trace_output_kern.c b/samples/bpf/trace_output_kern.c
index 1d7d422cae6f..b64815af0943 100644
--- a/samples/bpf/trace_output_kern.c
+++ b/samples/bpf/trace_output_kern.c
@@ -2,15 +2,16 @@
#include <linux/version.h>
#include <uapi/linux/bpf.h>
#include <bpf/bpf_helpers.h>
+#include "trace_common.h"
-struct bpf_map_def SEC("maps") my_map = {
- .type = BPF_MAP_TYPE_PERF_EVENT_ARRAY,
- .key_size = sizeof(int),
- .value_size = sizeof(u32),
- .max_entries = 2,
-};
+struct {
+ __uint(type, BPF_MAP_TYPE_PERF_EVENT_ARRAY);
+ __uint(key_size, sizeof(int));
+ __uint(value_size, sizeof(u32));
+ __uint(max_entries, 2);
+} my_map SEC(".maps");
-SEC("kprobe/sys_write")
+SEC("kprobe/" SYSCALL(sys_write))
int bpf_prog1(struct pt_regs *ctx)
{
struct S {
diff --git a/samples/bpf/trace_output_user.c b/samples/bpf/trace_output_user.c
index 60a17dd05345..371732f9cf8e 100644
--- a/samples/bpf/trace_output_user.c
+++ b/samples/bpf/trace_output_user.c
@@ -1,23 +1,10 @@
// SPDX-License-Identifier: GPL-2.0-only
#include <stdio.h>
-#include <unistd.h>
-#include <stdlib.h>
-#include <stdbool.h>
-#include <string.h>
#include <fcntl.h>
#include <poll.h>
-#include <linux/perf_event.h>
-#include <linux/bpf.h>
-#include <errno.h>
-#include <assert.h>
-#include <sys/syscall.h>
-#include <sys/ioctl.h>
-#include <sys/mman.h>
#include <time.h>
#include <signal.h>
#include <bpf/libbpf.h>
-#include "bpf_load.h"
-#include "perf-sys.h"
static __u64 time_get_ns(void)
{
@@ -56,21 +43,47 @@ static void print_bpf_output(void *ctx, int cpu, void *data, __u32 size)
int main(int argc, char **argv)
{
- struct perf_buffer_opts pb_opts = {};
+ struct bpf_link *link = NULL;
+ struct bpf_program *prog;
struct perf_buffer *pb;
+ struct bpf_object *obj;
+ int map_fd, ret = 0;
char filename[256];
FILE *f;
- int ret;
snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]);
+ obj = bpf_object__open_file(filename, NULL);
+ if (libbpf_get_error(obj)) {
+ fprintf(stderr, "ERROR: opening BPF object file failed\n");
+ return 0;
+ }
- if (load_bpf_file(filename)) {
- printf("%s", bpf_log_buf);
- return 1;
+ /* load BPF program */
+ if (bpf_object__load(obj)) {
+ fprintf(stderr, "ERROR: loading BPF object file failed\n");
+ goto cleanup;
}
- pb_opts.sample_cb = print_bpf_output;
- pb = perf_buffer__new(map_fd[0], 8, &pb_opts);
+ map_fd = bpf_object__find_map_fd_by_name(obj, "my_map");
+ if (map_fd < 0) {
+ fprintf(stderr, "ERROR: finding a map in obj file failed\n");
+ goto cleanup;
+ }
+
+ prog = bpf_object__find_program_by_name(obj, "bpf_prog1");
+ if (libbpf_get_error(prog)) {
+ fprintf(stderr, "ERROR: finding a prog in obj file failed\n");
+ goto cleanup;
+ }
+
+ link = bpf_program__attach(prog);
+ if (libbpf_get_error(link)) {
+ fprintf(stderr, "ERROR: bpf_program__attach failed\n");
+ link = NULL;
+ goto cleanup;
+ }
+
+ pb = perf_buffer__new(map_fd, 8, print_bpf_output, NULL, NULL, NULL);
ret = libbpf_get_error(pb);
if (ret) {
printf("failed to setup perf_buffer: %d\n", ret);
@@ -84,5 +97,9 @@ int main(int argc, char **argv)
while ((ret = perf_buffer__poll(pb, 1000)) >= 0 && cnt < MAX_CNT) {
}
kill(0, SIGINT);
+
+cleanup:
+ bpf_link__destroy(link);
+ bpf_object__close(obj);
return ret;
}
diff --git a/samples/bpf/tracex1_kern.c b/samples/bpf/tracex1_kern.c
index 8e2610e14475..ef30d2b353b0 100644
--- a/samples/bpf/tracex1_kern.c
+++ b/samples/bpf/tracex1_kern.c
@@ -11,7 +11,12 @@
#include <bpf/bpf_helpers.h>
#include <bpf/bpf_tracing.h>
-#define _(P) ({typeof(P) val = 0; bpf_probe_read(&val, sizeof(val), &P); val;})
+#define _(P) \
+ ({ \
+ typeof(P) val = 0; \
+ bpf_probe_read_kernel(&val, sizeof(val), &(P)); \
+ val; \
+ })
/* kprobe is NOT a stable ABI
* kernel functions can be removed, renamed or completely change semantics.
@@ -21,7 +26,7 @@
SEC("kprobe/__netif_receive_skb_core")
int bpf_prog1(struct pt_regs *ctx)
{
- /* attaches to kprobe netif_receive_skb,
+ /* attaches to kprobe __netif_receive_skb_core,
* looks for packets on loobpack device and prints them
*/
char devname[IFNAMSIZ];
@@ -30,11 +35,11 @@ int bpf_prog1(struct pt_regs *ctx)
int len;
/* non-portable! works for the given kernel only */
- skb = (struct sk_buff *) PT_REGS_PARM1(ctx);
+ bpf_probe_read_kernel(&skb, sizeof(skb), (void *)PT_REGS_PARM1(ctx));
dev = _(skb->dev);
len = _(skb->len);
- bpf_probe_read(devname, sizeof(devname), dev->name);
+ bpf_probe_read_kernel(devname, sizeof(devname), dev->name);
if (devname[0] == 'l' && devname[1] == 'o') {
char fmt[] = "skb %p len %d\n";
diff --git a/samples/bpf/tracex1_user.c b/samples/bpf/tracex1_user.c
index af8c20608ab5..9d4adb7fd834 100644
--- a/samples/bpf/tracex1_user.c
+++ b/samples/bpf/tracex1_user.c
@@ -1,20 +1,41 @@
// SPDX-License-Identifier: GPL-2.0
#include <stdio.h>
-#include <linux/bpf.h>
#include <unistd.h>
-#include <bpf/bpf.h>
-#include "bpf_load.h"
+#include <bpf/libbpf.h>
+#include "trace_helpers.h"
int main(int ac, char **argv)
{
- FILE *f;
+ struct bpf_link *link = NULL;
+ struct bpf_program *prog;
+ struct bpf_object *obj;
char filename[256];
+ FILE *f;
snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]);
+ obj = bpf_object__open_file(filename, NULL);
+ if (libbpf_get_error(obj)) {
+ fprintf(stderr, "ERROR: opening BPF object file failed\n");
+ return 0;
+ }
+
+ prog = bpf_object__find_program_by_name(obj, "bpf_prog1");
+ if (!prog) {
+ fprintf(stderr, "ERROR: finding a prog in obj file failed\n");
+ goto cleanup;
+ }
+
+ /* load BPF program */
+ if (bpf_object__load(obj)) {
+ fprintf(stderr, "ERROR: loading BPF object file failed\n");
+ goto cleanup;
+ }
- if (load_bpf_file(filename)) {
- printf("%s", bpf_log_buf);
- return 1;
+ link = bpf_program__attach(prog);
+ if (libbpf_get_error(link)) {
+ fprintf(stderr, "ERROR: bpf_program__attach failed\n");
+ link = NULL;
+ goto cleanup;
}
f = popen("taskset 1 ping -c5 localhost", "r");
@@ -22,5 +43,8 @@ int main(int ac, char **argv)
read_trace_pipe();
+cleanup:
+ bpf_link__destroy(link);
+ bpf_object__close(obj);
return 0;
}
diff --git a/samples/bpf/tracex2_kern.c b/samples/bpf/tracex2_kern.c
index d865bb309bcb..5bc696bac27d 100644
--- a/samples/bpf/tracex2_kern.c
+++ b/samples/bpf/tracex2_kern.c
@@ -10,13 +10,14 @@
#include <uapi/linux/bpf.h>
#include <bpf/bpf_helpers.h>
#include <bpf/bpf_tracing.h>
+#include "trace_common.h"
-struct bpf_map_def SEC("maps") my_map = {
- .type = BPF_MAP_TYPE_HASH,
- .key_size = sizeof(long),
- .value_size = sizeof(long),
- .max_entries = 1024,
-};
+struct {
+ __uint(type, BPF_MAP_TYPE_HASH);
+ __type(key, long);
+ __type(value, long);
+ __uint(max_entries, 1024);
+} my_map SEC(".maps");
/* kprobe is NOT a stable ABI. If kernel internals change this bpf+kprobe
* example will no longer be meaningful
@@ -70,14 +71,14 @@ struct hist_key {
u64 index;
};
-struct bpf_map_def SEC("maps") my_hist_map = {
- .type = BPF_MAP_TYPE_PERCPU_HASH,
- .key_size = sizeof(struct hist_key),
- .value_size = sizeof(long),
- .max_entries = 1024,
-};
+struct {
+ __uint(type, BPF_MAP_TYPE_PERCPU_HASH);
+ __uint(key_size, sizeof(struct hist_key));
+ __uint(value_size, sizeof(long));
+ __uint(max_entries, 1024);
+} my_hist_map SEC(".maps");
-SEC("kprobe/sys_write")
+SEC("kprobe/" SYSCALL(sys_write))
int bpf_prog3(struct pt_regs *ctx)
{
long write_size = PT_REGS_PARM3(ctx);
diff --git a/samples/bpf/tracex2_user.c b/samples/bpf/tracex2_user.c
index c9544a4ce61a..dd6205c6b6a7 100644
--- a/samples/bpf/tracex2_user.c
+++ b/samples/bpf/tracex2_user.c
@@ -3,17 +3,18 @@
#include <unistd.h>
#include <stdlib.h>
#include <signal.h>
-#include <linux/bpf.h>
#include <string.h>
-#include <sys/resource.h>
#include <bpf/bpf.h>
-#include "bpf_load.h"
+#include <bpf/libbpf.h>
#include "bpf_util.h"
#define MAX_INDEX 64
#define MAX_STARS 38
+/* my_map, my_hist_map */
+static int map_fd[2];
+
static void stars(char *str, long val, long max, int width)
{
int i;
@@ -114,17 +115,32 @@ static void int_exit(int sig)
int main(int ac, char **argv)
{
- struct rlimit r = {1024*1024, RLIM_INFINITY};
- char filename[256];
long key, next_key, value;
+ struct bpf_link *links[2];
+ struct bpf_program *prog;
+ struct bpf_object *obj;
+ char filename[256];
+ int i, j = 0;
FILE *f;
- int i;
snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]);
+ obj = bpf_object__open_file(filename, NULL);
+ if (libbpf_get_error(obj)) {
+ fprintf(stderr, "ERROR: opening BPF object file failed\n");
+ return 0;
+ }
- if (setrlimit(RLIMIT_MEMLOCK, &r)) {
- perror("setrlimit(RLIMIT_MEMLOCK)");
- return 1;
+ /* load BPF program */
+ if (bpf_object__load(obj)) {
+ fprintf(stderr, "ERROR: loading BPF object file failed\n");
+ goto cleanup;
+ }
+
+ map_fd[0] = bpf_object__find_map_fd_by_name(obj, "my_map");
+ map_fd[1] = bpf_object__find_map_fd_by_name(obj, "my_hist_map");
+ if (map_fd[0] < 0 || map_fd[1] < 0) {
+ fprintf(stderr, "ERROR: finding a map in obj file failed\n");
+ goto cleanup;
}
signal(SIGINT, int_exit);
@@ -138,9 +154,14 @@ int main(int ac, char **argv)
f = popen("dd if=/dev/zero of=/dev/null count=5000000", "r");
(void) f;
- if (load_bpf_file(filename)) {
- printf("%s", bpf_log_buf);
- return 1;
+ bpf_object__for_each_program(prog, obj) {
+ links[j] = bpf_program__attach(prog);
+ if (libbpf_get_error(links[j])) {
+ fprintf(stderr, "ERROR: bpf_program__attach failed\n");
+ links[j] = NULL;
+ goto cleanup;
+ }
+ j++;
}
for (i = 0; i < 5; i++) {
@@ -156,5 +177,10 @@ int main(int ac, char **argv)
}
print_hist(map_fd[1]);
+cleanup:
+ for (j--; j >= 0; j--)
+ bpf_link__destroy(links[j]);
+
+ bpf_object__close(obj);
return 0;
}
diff --git a/samples/bpf/tracex3_kern.c b/samples/bpf/tracex3_kern.c
index fe21c14feb8d..bde6591cb20c 100644
--- a/samples/bpf/tracex3_kern.c
+++ b/samples/bpf/tracex3_kern.c
@@ -11,12 +11,12 @@
#include <bpf/bpf_helpers.h>
#include <bpf/bpf_tracing.h>
-struct bpf_map_def SEC("maps") my_map = {
- .type = BPF_MAP_TYPE_HASH,
- .key_size = sizeof(long),
- .value_size = sizeof(u64),
- .max_entries = 4096,
-};
+struct {
+ __uint(type, BPF_MAP_TYPE_HASH);
+ __type(key, long);
+ __type(value, u64);
+ __uint(max_entries, 4096);
+} my_map SEC(".maps");
/* kprobe is NOT a stable ABI. If kernel internals change this bpf+kprobe
* example will no longer be meaningful
@@ -42,14 +42,14 @@ static unsigned int log2l(unsigned long long n)
#define SLOTS 100
-struct bpf_map_def SEC("maps") lat_map = {
- .type = BPF_MAP_TYPE_PERCPU_ARRAY,
- .key_size = sizeof(u32),
- .value_size = sizeof(u64),
- .max_entries = SLOTS,
-};
+struct {
+ __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY);
+ __uint(key_size, sizeof(u32));
+ __uint(value_size, sizeof(u64));
+ __uint(max_entries, SLOTS);
+} lat_map SEC(".maps");
-SEC("kprobe/blk_account_io_completion")
+SEC("kprobe/__blk_account_io_done")
int bpf_prog2(struct pt_regs *ctx)
{
long rq = PT_REGS_PARM1(ctx);
diff --git a/samples/bpf/tracex3_user.c b/samples/bpf/tracex3_user.c
index cf8fedc773f2..d5eebace31e6 100644
--- a/samples/bpf/tracex3_user.c
+++ b/samples/bpf/tracex3_user.c
@@ -7,11 +7,9 @@
#include <unistd.h>
#include <stdbool.h>
#include <string.h>
-#include <linux/bpf.h>
-#include <sys/resource.h>
#include <bpf/bpf.h>
-#include "bpf_load.h"
+#include <bpf/libbpf.h>
#include "bpf_util.h"
#define SLOTS 100
@@ -108,21 +106,11 @@ static void print_hist(int fd)
int main(int ac, char **argv)
{
- struct rlimit r = {1024*1024, RLIM_INFINITY};
+ struct bpf_link *links[2];
+ struct bpf_program *prog;
+ struct bpf_object *obj;
char filename[256];
- int i;
-
- snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]);
-
- if (setrlimit(RLIMIT_MEMLOCK, &r)) {
- perror("setrlimit(RLIMIT_MEMLOCK)");
- return 1;
- }
-
- if (load_bpf_file(filename)) {
- printf("%s", bpf_log_buf);
- return 1;
- }
+ int map_fd, i, j = 0;
for (i = 1; i < ac; i++) {
if (strcmp(argv[i], "-a") == 0) {
@@ -137,6 +125,35 @@ int main(int ac, char **argv)
}
}
+ snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]);
+ obj = bpf_object__open_file(filename, NULL);
+ if (libbpf_get_error(obj)) {
+ fprintf(stderr, "ERROR: opening BPF object file failed\n");
+ return 0;
+ }
+
+ /* load BPF program */
+ if (bpf_object__load(obj)) {
+ fprintf(stderr, "ERROR: loading BPF object file failed\n");
+ goto cleanup;
+ }
+
+ map_fd = bpf_object__find_map_fd_by_name(obj, "lat_map");
+ if (map_fd < 0) {
+ fprintf(stderr, "ERROR: finding a map in obj file failed\n");
+ goto cleanup;
+ }
+
+ bpf_object__for_each_program(prog, obj) {
+ links[j] = bpf_program__attach(prog);
+ if (libbpf_get_error(links[j])) {
+ fprintf(stderr, "ERROR: bpf_program__attach failed\n");
+ links[j] = NULL;
+ goto cleanup;
+ }
+ j++;
+ }
+
printf(" heatmap of IO latency\n");
if (text_only)
printf(" %s", sym[num_colors - 1]);
@@ -153,9 +170,14 @@ int main(int ac, char **argv)
for (i = 0; ; i++) {
if (i % 20 == 0)
print_banner();
- print_hist(map_fd[1]);
+ print_hist(map_fd);
sleep(2);
}
+cleanup:
+ for (j--; j >= 0; j--)
+ bpf_link__destroy(links[j]);
+
+ bpf_object__close(obj);
return 0;
}
diff --git a/samples/bpf/tracex4_kern.c b/samples/bpf/tracex4_kern.c
index b1bb9df88f8e..eb0f8fdd14bf 100644
--- a/samples/bpf/tracex4_kern.c
+++ b/samples/bpf/tracex4_kern.c
@@ -15,12 +15,12 @@ struct pair {
u64 ip;
};
-struct bpf_map_def SEC("maps") my_map = {
- .type = BPF_MAP_TYPE_HASH,
- .key_size = sizeof(long),
- .value_size = sizeof(struct pair),
- .max_entries = 1000000,
-};
+struct {
+ __uint(type, BPF_MAP_TYPE_HASH);
+ __type(key, long);
+ __type(value, struct pair);
+ __uint(max_entries, 1000000);
+} my_map SEC(".maps");
/* kprobe is NOT a stable ABI. If kernel internals change this bpf+kprobe
* example will no longer be meaningful
diff --git a/samples/bpf/tracex4_user.c b/samples/bpf/tracex4_user.c
index ec52203fce39..227b05a0bc88 100644
--- a/samples/bpf/tracex4_user.c
+++ b/samples/bpf/tracex4_user.c
@@ -8,11 +8,9 @@
#include <stdbool.h>
#include <string.h>
#include <time.h>
-#include <linux/bpf.h>
-#include <sys/resource.h>
#include <bpf/bpf.h>
-#include "bpf_load.h"
+#include <bpf/libbpf.h>
struct pair {
long long val;
@@ -33,11 +31,11 @@ static void print_old_objects(int fd)
__u64 key, next_key;
struct pair v;
- key = write(1, "\e[1;1H\e[2J", 12); /* clear screen */
+ key = write(1, "\e[1;1H\e[2J", 11); /* clear screen */
key = -1;
- while (bpf_map_get_next_key(map_fd[0], &key, &next_key) == 0) {
- bpf_map_lookup_elem(map_fd[0], &next_key, &v);
+ while (bpf_map_get_next_key(fd, &key, &next_key) == 0) {
+ bpf_map_lookup_elem(fd, &next_key, &v);
key = next_key;
if (val - v.val < 1000000000ll)
/* object was allocated more then 1 sec ago */
@@ -49,26 +47,50 @@ static void print_old_objects(int fd)
int main(int ac, char **argv)
{
- struct rlimit r = {RLIM_INFINITY, RLIM_INFINITY};
+ struct bpf_link *links[2];
+ struct bpf_program *prog;
+ struct bpf_object *obj;
char filename[256];
- int i;
+ int map_fd, i, j = 0;
snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]);
+ obj = bpf_object__open_file(filename, NULL);
+ if (libbpf_get_error(obj)) {
+ fprintf(stderr, "ERROR: opening BPF object file failed\n");
+ return 0;
+ }
+
+ /* load BPF program */
+ if (bpf_object__load(obj)) {
+ fprintf(stderr, "ERROR: loading BPF object file failed\n");
+ goto cleanup;
+ }
- if (setrlimit(RLIMIT_MEMLOCK, &r)) {
- perror("setrlimit(RLIMIT_MEMLOCK, RLIM_INFINITY)");
- return 1;
+ map_fd = bpf_object__find_map_fd_by_name(obj, "my_map");
+ if (map_fd < 0) {
+ fprintf(stderr, "ERROR: finding a map in obj file failed\n");
+ goto cleanup;
}
- if (load_bpf_file(filename)) {
- printf("%s", bpf_log_buf);
- return 1;
+ bpf_object__for_each_program(prog, obj) {
+ links[j] = bpf_program__attach(prog);
+ if (libbpf_get_error(links[j])) {
+ fprintf(stderr, "ERROR: bpf_program__attach failed\n");
+ links[j] = NULL;
+ goto cleanup;
+ }
+ j++;
}
for (i = 0; ; i++) {
- print_old_objects(map_fd[1]);
+ print_old_objects(map_fd);
sleep(1);
}
+cleanup:
+ for (j--; j >= 0; j--)
+ bpf_link__destroy(links[j]);
+
+ bpf_object__close(obj);
return 0;
}
diff --git a/samples/bpf/tracex5_kern.c b/samples/bpf/tracex5_kern.c
index 481790fde864..64a1f7550d7e 100644
--- a/samples/bpf/tracex5_kern.c
+++ b/samples/bpf/tracex5_kern.c
@@ -15,16 +15,16 @@
#define PROG(F) SEC("kprobe/"__stringify(F)) int bpf_func_##F
-struct bpf_map_def SEC("maps") progs = {
- .type = BPF_MAP_TYPE_PROG_ARRAY,
- .key_size = sizeof(u32),
- .value_size = sizeof(u32),
+struct {
+ __uint(type, BPF_MAP_TYPE_PROG_ARRAY);
+ __uint(key_size, sizeof(u32));
+ __uint(value_size, sizeof(u32));
#ifdef __mips__
- .max_entries = 6000, /* MIPS n64 syscalls start at 5000 */
+ __uint(max_entries, 6000); /* MIPS n64 syscalls start at 5000 */
#else
- .max_entries = 1024,
+ __uint(max_entries, 1024);
#endif
-};
+} progs SEC(".maps");
SEC("kprobe/__seccomp_filter")
int bpf_prog1(struct pt_regs *ctx)
@@ -47,7 +47,7 @@ PROG(SYS__NR_write)(struct pt_regs *ctx)
{
struct seccomp_data sd;
- bpf_probe_read(&sd, sizeof(sd), (void *)PT_REGS_PARM2(ctx));
+ bpf_probe_read_kernel(&sd, sizeof(sd), (void *)PT_REGS_PARM2(ctx));
if (sd.args[2] == 512) {
char fmt[] = "write(fd=%d, buf=%p, size=%d)\n";
bpf_trace_printk(fmt, sizeof(fmt),
@@ -60,7 +60,7 @@ PROG(SYS__NR_read)(struct pt_regs *ctx)
{
struct seccomp_data sd;
- bpf_probe_read(&sd, sizeof(sd), (void *)PT_REGS_PARM2(ctx));
+ bpf_probe_read_kernel(&sd, sizeof(sd), (void *)PT_REGS_PARM2(ctx));
if (sd.args[2] > 128 && sd.args[2] <= 1024) {
char fmt[] = "read(fd=%d, buf=%p, size=%d)\n";
bpf_trace_printk(fmt, sizeof(fmt),
diff --git a/samples/bpf/tracex5_user.c b/samples/bpf/tracex5_user.c
index c4ab91c89494..9d7d79f0d47d 100644
--- a/samples/bpf/tracex5_user.c
+++ b/samples/bpf/tracex5_user.c
@@ -1,13 +1,20 @@
// SPDX-License-Identifier: GPL-2.0
#include <stdio.h>
-#include <linux/bpf.h>
+#include <stdlib.h>
#include <unistd.h>
#include <linux/filter.h>
#include <linux/seccomp.h>
#include <sys/prctl.h>
#include <bpf/bpf.h>
-#include "bpf_load.h"
-#include <sys/resource.h>
+#include <bpf/libbpf.h>
+#include "trace_helpers.h"
+#include "bpf_util.h"
+
+#ifdef __mips__
+#define MAX_ENTRIES 6000 /* MIPS n64 syscalls start at 5000 */
+#else
+#define MAX_ENTRIES 1024
+#endif
/* install fake seccomp program to enable seccomp code path inside the kernel,
* so that our kprobe attached to seccomp_phase1() can be triggered
@@ -18,7 +25,7 @@ static void install_accept_all_seccomp(void)
BPF_STMT(BPF_RET+BPF_K, SECCOMP_RET_ALLOW),
};
struct sock_fprog prog = {
- .len = (unsigned short)(sizeof(filter)/sizeof(filter[0])),
+ .len = (unsigned short)ARRAY_SIZE(filter),
.filter = filter,
};
if (prctl(PR_SET_SECCOMP, 2, &prog))
@@ -27,16 +34,54 @@ static void install_accept_all_seccomp(void)
int main(int ac, char **argv)
{
- FILE *f;
+ struct bpf_link *link = NULL;
+ struct bpf_program *prog;
+ struct bpf_object *obj;
+ int key, fd, progs_fd;
+ const char *section;
char filename[256];
- struct rlimit r = {RLIM_INFINITY, RLIM_INFINITY};
+ FILE *f;
snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]);
- setrlimit(RLIMIT_MEMLOCK, &r);
+ obj = bpf_object__open_file(filename, NULL);
+ if (libbpf_get_error(obj)) {
+ fprintf(stderr, "ERROR: opening BPF object file failed\n");
+ return 0;
+ }
+
+ prog = bpf_object__find_program_by_name(obj, "bpf_prog1");
+ if (!prog) {
+ printf("finding a prog in obj file failed\n");
+ goto cleanup;
+ }
+
+ /* load BPF program */
+ if (bpf_object__load(obj)) {
+ fprintf(stderr, "ERROR: loading BPF object file failed\n");
+ goto cleanup;
+ }
+
+ link = bpf_program__attach(prog);
+ if (libbpf_get_error(link)) {
+ fprintf(stderr, "ERROR: bpf_program__attach failed\n");
+ link = NULL;
+ goto cleanup;
+ }
+
+ progs_fd = bpf_object__find_map_fd_by_name(obj, "progs");
+ if (progs_fd < 0) {
+ fprintf(stderr, "ERROR: finding a map in obj file failed\n");
+ goto cleanup;
+ }
+
+ bpf_object__for_each_program(prog, obj) {
+ section = bpf_program__section_name(prog);
+ /* register only syscalls to PROG_ARRAY */
+ if (sscanf(section, "kprobe/%d", &key) != 1)
+ continue;
- if (load_bpf_file(filename)) {
- printf("%s", bpf_log_buf);
- return 1;
+ fd = bpf_program__fd(prog);
+ bpf_map_update_elem(progs_fd, &key, &fd, BPF_ANY);
}
install_accept_all_seccomp();
@@ -46,5 +91,8 @@ int main(int ac, char **argv)
read_trace_pipe();
+cleanup:
+ bpf_link__destroy(link);
+ bpf_object__close(obj);
return 0;
}
diff --git a/samples/bpf/tracex6_kern.c b/samples/bpf/tracex6_kern.c
index 96c234efa852..acad5712d8b4 100644
--- a/samples/bpf/tracex6_kern.c
+++ b/samples/bpf/tracex6_kern.c
@@ -3,24 +3,26 @@
#include <uapi/linux/bpf.h>
#include <bpf/bpf_helpers.h>
-struct bpf_map_def SEC("maps") counters = {
- .type = BPF_MAP_TYPE_PERF_EVENT_ARRAY,
- .key_size = sizeof(int),
- .value_size = sizeof(u32),
- .max_entries = 64,
-};
-struct bpf_map_def SEC("maps") values = {
- .type = BPF_MAP_TYPE_HASH,
- .key_size = sizeof(int),
- .value_size = sizeof(u64),
- .max_entries = 64,
-};
-struct bpf_map_def SEC("maps") values2 = {
- .type = BPF_MAP_TYPE_HASH,
- .key_size = sizeof(int),
- .value_size = sizeof(struct bpf_perf_event_value),
- .max_entries = 64,
-};
+struct {
+ __uint(type, BPF_MAP_TYPE_PERF_EVENT_ARRAY);
+ __uint(key_size, sizeof(int));
+ __uint(value_size, sizeof(u32));
+ __uint(max_entries, 64);
+} counters SEC(".maps");
+
+struct {
+ __uint(type, BPF_MAP_TYPE_HASH);
+ __type(key, int);
+ __type(value, u64);
+ __uint(max_entries, 64);
+} values SEC(".maps");
+
+struct {
+ __uint(type, BPF_MAP_TYPE_HASH);
+ __type(key, int);
+ __type(value, struct bpf_perf_event_value);
+ __uint(max_entries, 64);
+} values2 SEC(".maps");
SEC("kprobe/htab_map_get_next_key")
int bpf_prog1(struct pt_regs *ctx)
diff --git a/samples/bpf/tracex6_user.c b/samples/bpf/tracex6_user.c
index 4bb3c830adb2..8e83bf2a84a4 100644
--- a/samples/bpf/tracex6_user.c
+++ b/samples/bpf/tracex6_user.c
@@ -4,23 +4,24 @@
#include <assert.h>
#include <fcntl.h>
#include <linux/perf_event.h>
-#include <linux/bpf.h>
#include <sched.h>
#include <stdio.h>
#include <stdlib.h>
#include <sys/ioctl.h>
-#include <sys/resource.h>
#include <sys/time.h>
#include <sys/types.h>
#include <sys/wait.h>
#include <unistd.h>
-#include "bpf_load.h"
#include <bpf/bpf.h>
+#include <bpf/libbpf.h>
#include "perf-sys.h"
#define SAMPLE_PERIOD 0x7fffffffffffffffULL
+/* counters, values, values2 */
+static int map_fd[3];
+
static void check_on_cpu(int cpu, struct perf_event_attr *attr)
{
struct bpf_perf_event_value value2;
@@ -173,17 +174,49 @@ static void test_bpf_perf_event(void)
int main(int argc, char **argv)
{
- struct rlimit r = {RLIM_INFINITY, RLIM_INFINITY};
+ struct bpf_link *links[2];
+ struct bpf_program *prog;
+ struct bpf_object *obj;
char filename[256];
+ int i = 0;
snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]);
+ obj = bpf_object__open_file(filename, NULL);
+ if (libbpf_get_error(obj)) {
+ fprintf(stderr, "ERROR: opening BPF object file failed\n");
+ return 0;
+ }
- setrlimit(RLIMIT_MEMLOCK, &r);
- if (load_bpf_file(filename)) {
- printf("%s", bpf_log_buf);
- return 1;
+ /* load BPF program */
+ if (bpf_object__load(obj)) {
+ fprintf(stderr, "ERROR: loading BPF object file failed\n");
+ goto cleanup;
+ }
+
+ map_fd[0] = bpf_object__find_map_fd_by_name(obj, "counters");
+ map_fd[1] = bpf_object__find_map_fd_by_name(obj, "values");
+ map_fd[2] = bpf_object__find_map_fd_by_name(obj, "values2");
+ if (map_fd[0] < 0 || map_fd[1] < 0 || map_fd[2] < 0) {
+ fprintf(stderr, "ERROR: finding a map in obj file failed\n");
+ goto cleanup;
+ }
+
+ bpf_object__for_each_program(prog, obj) {
+ links[i] = bpf_program__attach(prog);
+ if (libbpf_get_error(links[i])) {
+ fprintf(stderr, "ERROR: bpf_program__attach failed\n");
+ links[i] = NULL;
+ goto cleanup;
+ }
+ i++;
}
test_bpf_perf_event();
+
+cleanup:
+ for (i--; i >= 0; i--)
+ bpf_link__destroy(links[i]);
+
+ bpf_object__close(obj);
return 0;
}
diff --git a/samples/bpf/tracex7_user.c b/samples/bpf/tracex7_user.c
index ea6dae78f0df..8be7ce18d3ba 100644
--- a/samples/bpf/tracex7_user.c
+++ b/samples/bpf/tracex7_user.c
@@ -1,28 +1,56 @@
#define _GNU_SOURCE
#include <stdio.h>
-#include <linux/bpf.h>
#include <unistd.h>
-#include <bpf/bpf.h>
-#include "bpf_load.h"
+#include <bpf/libbpf.h>
int main(int argc, char **argv)
{
- FILE *f;
+ struct bpf_link *link = NULL;
+ struct bpf_program *prog;
+ struct bpf_object *obj;
char filename[256];
char command[256];
- int ret;
+ int ret = 0;
+ FILE *f;
+
+ if (!argv[1]) {
+ fprintf(stderr, "ERROR: Run with the btrfs device argument!\n");
+ return 0;
+ }
snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]);
+ obj = bpf_object__open_file(filename, NULL);
+ if (libbpf_get_error(obj)) {
+ fprintf(stderr, "ERROR: opening BPF object file failed\n");
+ return 0;
+ }
+
+ prog = bpf_object__find_program_by_name(obj, "bpf_prog1");
+ if (!prog) {
+ fprintf(stderr, "ERROR: finding a prog in obj file failed\n");
+ goto cleanup;
+ }
+
+ /* load BPF program */
+ if (bpf_object__load(obj)) {
+ fprintf(stderr, "ERROR: loading BPF object file failed\n");
+ goto cleanup;
+ }
- if (load_bpf_file(filename)) {
- printf("%s", bpf_log_buf);
- return 1;
+ link = bpf_program__attach(prog);
+ if (libbpf_get_error(link)) {
+ fprintf(stderr, "ERROR: bpf_program__attach failed\n");
+ link = NULL;
+ goto cleanup;
}
snprintf(command, 256, "mount %s tmpmnt/", argv[1]);
f = popen(command, "r");
ret = pclose(f);
+cleanup:
+ bpf_link__destroy(link);
+ bpf_object__close(obj);
return ret ? 0 : 1;
}
diff --git a/samples/bpf/xdp1_kern.c b/samples/bpf/xdp1_kern.c
index 34b64394ed9c..0a5c704badd0 100644
--- a/samples/bpf/xdp1_kern.c
+++ b/samples/bpf/xdp1_kern.c
@@ -39,11 +39,13 @@ static int parse_ipv6(void *data, u64 nh_off, void *data_end)
return ip6h->nexthdr;
}
-SEC("xdp1")
+#define XDPBUFSIZE 64
+SEC("xdp.frags")
int xdp_prog1(struct xdp_md *ctx)
{
- void *data_end = (void *)(long)ctx->data_end;
- void *data = (void *)(long)ctx->data;
+ __u8 pkt[XDPBUFSIZE] = {};
+ void *data_end = &pkt[XDPBUFSIZE-1];
+ void *data = pkt;
struct ethhdr *eth = data;
int rc = XDP_DROP;
long *value;
@@ -51,12 +53,16 @@ int xdp_prog1(struct xdp_md *ctx)
u64 nh_off;
u32 ipproto;
+ if (bpf_xdp_load_bytes(ctx, 0, pkt, sizeof(pkt)))
+ return rc;
+
nh_off = sizeof(*eth);
if (data + nh_off > data_end)
return rc;
h_proto = eth->h_proto;
+ /* Handle VLAN tagged packet */
if (h_proto == htons(ETH_P_8021Q) || h_proto == htons(ETH_P_8021AD)) {
struct vlan_hdr *vhdr;
@@ -66,6 +72,7 @@ int xdp_prog1(struct xdp_md *ctx)
return rc;
h_proto = vhdr->h_vlan_encapsulated_proto;
}
+ /* Handle double VLAN tagged packet */
if (h_proto == htons(ETH_P_8021Q) || h_proto == htons(ETH_P_8021AD)) {
struct vlan_hdr *vhdr;
diff --git a/samples/bpf/xdp1_user.c b/samples/bpf/xdp1_user.c
index c447ad9e3a1d..ac370e638fa3 100644
--- a/samples/bpf/xdp1_user.c
+++ b/samples/bpf/xdp1_user.c
@@ -11,7 +11,6 @@
#include <string.h>
#include <unistd.h>
#include <libgen.h>
-#include <sys/resource.h>
#include <net/if.h>
#include "bpf_util.h"
@@ -26,12 +25,12 @@ static void int_exit(int sig)
{
__u32 curr_prog_id = 0;
- if (bpf_get_link_xdp_id(ifindex, &curr_prog_id, xdp_flags)) {
- printf("bpf_get_link_xdp_id failed\n");
+ if (bpf_xdp_query_id(ifindex, xdp_flags, &curr_prog_id)) {
+ printf("bpf_xdp_query_id failed\n");
exit(1);
}
if (prog_id == curr_prog_id)
- bpf_set_link_xdp_fd(ifindex, -1, xdp_flags);
+ bpf_xdp_detach(ifindex, xdp_flags, NULL);
else if (!curr_prog_id)
printf("couldn't find a prog id on a given interface\n");
else
@@ -79,14 +78,11 @@ static void usage(const char *prog)
int main(int argc, char **argv)
{
- struct rlimit r = {RLIM_INFINITY, RLIM_INFINITY};
- struct bpf_prog_load_attr prog_load_attr = {
- .prog_type = BPF_PROG_TYPE_XDP,
- };
struct bpf_prog_info info = {};
__u32 info_len = sizeof(info);
const char *optstr = "FSN";
int prog_fd, map_fd, opt;
+ struct bpf_program *prog;
struct bpf_object *obj;
struct bpf_map *map;
char filename[256];
@@ -117,11 +113,6 @@ int main(int argc, char **argv)
return 1;
}
- if (setrlimit(RLIMIT_MEMLOCK, &r)) {
- perror("setrlimit(RLIMIT_MEMLOCK)");
- return 1;
- }
-
ifindex = if_nametoindex(argv[optind]);
if (!ifindex) {
perror("if_nametoindex");
@@ -129,12 +120,20 @@ int main(int argc, char **argv)
}
snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]);
- prog_load_attr.file = filename;
+ obj = bpf_object__open_file(filename, NULL);
+ if (libbpf_get_error(obj))
+ return 1;
+
+ prog = bpf_object__next_program(obj, NULL);
+ bpf_program__set_type(prog, BPF_PROG_TYPE_XDP);
- if (bpf_prog_load_xattr(&prog_load_attr, &obj, &prog_fd))
+ err = bpf_object__load(obj);
+ if (err)
return 1;
- map = bpf_map__next(NULL, obj);
+ prog_fd = bpf_program__fd(prog);
+
+ map = bpf_object__next_map(obj, NULL);
if (!map) {
printf("finding a map in obj file failed\n");
return 1;
@@ -149,7 +148,7 @@ int main(int argc, char **argv)
signal(SIGINT, int_exit);
signal(SIGTERM, int_exit);
- if (bpf_set_link_xdp_fd(ifindex, prog_fd, xdp_flags) < 0) {
+ if (bpf_xdp_attach(ifindex, prog_fd, xdp_flags, NULL) < 0) {
printf("link set xdp fd failed\n");
return 1;
}
@@ -161,7 +160,7 @@ int main(int argc, char **argv)
}
prog_id = info.id;
- poll_stats(map_fd, 2);
+ poll_stats(map_fd, 1);
return 0;
}
diff --git a/samples/bpf/xdp2_kern.c b/samples/bpf/xdp2_kern.c
index c787f4b49646..3332ba6bb95f 100644
--- a/samples/bpf/xdp2_kern.c
+++ b/samples/bpf/xdp2_kern.c
@@ -55,11 +55,13 @@ static int parse_ipv6(void *data, u64 nh_off, void *data_end)
return ip6h->nexthdr;
}
-SEC("xdp1")
+#define XDPBUFSIZE 64
+SEC("xdp.frags")
int xdp_prog1(struct xdp_md *ctx)
{
- void *data_end = (void *)(long)ctx->data_end;
- void *data = (void *)(long)ctx->data;
+ __u8 pkt[XDPBUFSIZE] = {};
+ void *data_end = &pkt[XDPBUFSIZE-1];
+ void *data = pkt;
struct ethhdr *eth = data;
int rc = XDP_DROP;
long *value;
@@ -67,12 +69,16 @@ int xdp_prog1(struct xdp_md *ctx)
u64 nh_off;
u32 ipproto;
+ if (bpf_xdp_load_bytes(ctx, 0, pkt, sizeof(pkt)))
+ return rc;
+
nh_off = sizeof(*eth);
if (data + nh_off > data_end)
return rc;
h_proto = eth->h_proto;
+ /* Handle VLAN tagged packet */
if (h_proto == htons(ETH_P_8021Q) || h_proto == htons(ETH_P_8021AD)) {
struct vlan_hdr *vhdr;
@@ -82,6 +88,7 @@ int xdp_prog1(struct xdp_md *ctx)
return rc;
h_proto = vhdr->h_vlan_encapsulated_proto;
}
+ /* Handle double VLAN tagged packet */
if (h_proto == htons(ETH_P_8021Q) || h_proto == htons(ETH_P_8021AD)) {
struct vlan_hdr *vhdr;
diff --git a/samples/bpf/xdp2skb_meta_kern.c b/samples/bpf/xdp2skb_meta_kern.c
index 9b783316e860..d5631014a176 100644
--- a/samples/bpf/xdp2skb_meta_kern.c
+++ b/samples/bpf/xdp2skb_meta_kern.c
@@ -6,7 +6,7 @@
* This uses the XDP data_meta infrastructure, and is a cooperation
* between two bpf-programs (1) XDP and (2) clsact at TC-ingress hook.
*
- * Notice: This example does not use the BPF C-loader (bpf_load.c),
+ * Notice: This example does not use the BPF C-loader,
* but instead rely on the iproute2 TC tool for loading BPF-objects.
*/
#include <uapi/linux/bpf.h>
diff --git a/samples/bpf/xdp_adjust_tail_user.c b/samples/bpf/xdp_adjust_tail_user.c
index ba482dc3da33..167646077c8f 100644
--- a/samples/bpf/xdp_adjust_tail_user.c
+++ b/samples/bpf/xdp_adjust_tail_user.c
@@ -14,7 +14,6 @@
#include <stdlib.h>
#include <string.h>
#include <net/if.h>
-#include <sys/resource.h>
#include <arpa/inet.h>
#include <netinet/ether.h>
#include <unistd.h>
@@ -34,12 +33,12 @@ static void int_exit(int sig)
__u32 curr_prog_id = 0;
if (ifindex > -1) {
- if (bpf_get_link_xdp_id(ifindex, &curr_prog_id, xdp_flags)) {
- printf("bpf_get_link_xdp_id failed\n");
+ if (bpf_xdp_query_id(ifindex, xdp_flags, &curr_prog_id)) {
+ printf("bpf_xdp_query_id failed\n");
exit(1);
}
if (prog_id == curr_prog_id)
- bpf_set_link_xdp_fd(ifindex, -1, xdp_flags);
+ bpf_xdp_detach(ifindex, xdp_flags, NULL);
else if (!curr_prog_id)
printf("couldn't find a prog id on a given iface\n");
else
@@ -82,16 +81,13 @@ static void usage(const char *cmd)
int main(int argc, char **argv)
{
- struct rlimit r = {RLIM_INFINITY, RLIM_INFINITY};
- struct bpf_prog_load_attr prog_load_attr = {
- .prog_type = BPF_PROG_TYPE_XDP,
- };
unsigned char opt_flags[256] = {};
const char *optstr = "i:T:P:SNFh";
struct bpf_prog_info info = {};
__u32 info_len = sizeof(info);
unsigned int kill_after_s = 0;
int i, prog_fd, map_fd, opt;
+ struct bpf_program *prog;
struct bpf_object *obj;
__u32 max_pckt_size = 0;
__u32 key = 0;
@@ -143,22 +139,26 @@ int main(int argc, char **argv)
}
}
- if (setrlimit(RLIMIT_MEMLOCK, &r)) {
- perror("setrlimit(RLIMIT_MEMLOCK, RLIM_INFINITY)");
- return 1;
- }
-
if (!ifindex) {
fprintf(stderr, "Invalid ifname\n");
return 1;
}
snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]);
- prog_load_attr.file = filename;
- if (bpf_prog_load_xattr(&prog_load_attr, &obj, &prog_fd))
+ obj = bpf_object__open_file(filename, NULL);
+ if (libbpf_get_error(obj))
return 1;
+ prog = bpf_object__next_program(obj, NULL);
+ bpf_program__set_type(prog, BPF_PROG_TYPE_XDP);
+
+ err = bpf_object__load(obj);
+ if (err)
+ return 1;
+
+ prog_fd = bpf_program__fd(prog);
+
/* static global var 'max_pcktsz' is accessible from .data section */
if (max_pckt_size) {
map_fd = bpf_object__find_map_fd_by_name(obj, "xdp_adju.data");
@@ -179,7 +179,7 @@ int main(int argc, char **argv)
signal(SIGINT, int_exit);
signal(SIGTERM, int_exit);
- if (bpf_set_link_xdp_fd(ifindex, prog_fd, xdp_flags) < 0) {
+ if (bpf_xdp_attach(ifindex, prog_fd, xdp_flags, NULL) < 0) {
printf("link set xdp fd failed\n");
return 1;
}
diff --git a/samples/bpf/xdp_fwd_user.c b/samples/bpf/xdp_fwd_user.c
index 74a4583d0d86..84f57f1209ce 100644
--- a/samples/bpf/xdp_fwd_user.c
+++ b/samples/bpf/xdp_fwd_user.c
@@ -33,7 +33,7 @@ static int do_attach(int idx, int prog_fd, int map_fd, const char *name)
{
int err;
- err = bpf_set_link_xdp_fd(idx, prog_fd, xdp_flags);
+ err = bpf_xdp_attach(idx, prog_fd, xdp_flags, NULL);
if (err < 0) {
printf("ERROR: failed to attach program to %s\n", name);
return err;
@@ -47,17 +47,60 @@ static int do_attach(int idx, int prog_fd, int map_fd, const char *name)
return err;
}
-static int do_detach(int idx, const char *name)
+static int do_detach(int ifindex, const char *ifname, const char *app_name)
{
- int err;
+ LIBBPF_OPTS(bpf_xdp_attach_opts, opts);
+ struct bpf_prog_info prog_info = {};
+ char prog_name[BPF_OBJ_NAME_LEN];
+ __u32 info_len, curr_prog_id;
+ int prog_fd;
+ int err = 1;
+
+ if (bpf_xdp_query_id(ifindex, xdp_flags, &curr_prog_id)) {
+ printf("ERROR: bpf_xdp_query_id failed (%s)\n",
+ strerror(errno));
+ return err;
+ }
- err = bpf_set_link_xdp_fd(idx, -1, xdp_flags);
- if (err < 0)
- printf("ERROR: failed to detach program from %s\n", name);
+ if (!curr_prog_id) {
+ printf("ERROR: flags(0x%x) xdp prog is not attached to %s\n",
+ xdp_flags, ifname);
+ return err;
+ }
+
+ info_len = sizeof(prog_info);
+ prog_fd = bpf_prog_get_fd_by_id(curr_prog_id);
+ if (prog_fd < 0) {
+ printf("ERROR: bpf_prog_get_fd_by_id failed (%s)\n",
+ strerror(errno));
+ return prog_fd;
+ }
+ err = bpf_obj_get_info_by_fd(prog_fd, &prog_info, &info_len);
+ if (err) {
+ printf("ERROR: bpf_obj_get_info_by_fd failed (%s)\n",
+ strerror(errno));
+ goto close_out;
+ }
+ snprintf(prog_name, sizeof(prog_name), "%s_prog", app_name);
+ prog_name[BPF_OBJ_NAME_LEN - 1] = '\0';
+
+ if (strcmp(prog_info.name, prog_name)) {
+ printf("ERROR: %s isn't attached to %s\n", app_name, ifname);
+ err = 1;
+ goto close_out;
+ }
+
+ opts.old_prog_fd = prog_fd;
+ err = bpf_xdp_detach(ifindex, xdp_flags, &opts);
+ if (err < 0)
+ printf("ERROR: failed to detach program from %s (%s)\n",
+ ifname, strerror(errno));
/* TODO: Remember to cleanup map, when adding use of shared map
* bpf_map_delete_elem((map_fd, &idx);
*/
+close_out:
+ close(prog_fd);
return err;
}
@@ -67,18 +110,19 @@ static void usage(const char *prog)
"usage: %s [OPTS] interface-list\n"
"\nOPTS:\n"
" -d detach program\n"
+ " -S use skb-mode\n"
+ " -F force loading prog\n"
" -D direct table lookups (skip fib rules)\n",
prog);
}
int main(int argc, char **argv)
{
- struct bpf_prog_load_attr prog_load_attr = {
- .prog_type = BPF_PROG_TYPE_XDP,
- };
const char *prog_name = "xdp_fwd";
- struct bpf_program *prog;
- int prog_fd, map_fd = -1;
+ struct bpf_program *prog = NULL;
+ struct bpf_program *pos;
+ const char *sec_name;
+ int prog_fd = -1, map_fd = -1;
char filename[PATH_MAX];
struct bpf_object *obj;
int opt, i, idx, err;
@@ -115,7 +159,6 @@ int main(int argc, char **argv)
if (attach) {
snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]);
- prog_load_attr.file = filename;
if (access(filename, O_RDONLY) < 0) {
printf("error accessing file %s: %s\n",
@@ -123,7 +166,14 @@ int main(int argc, char **argv)
return 1;
}
- err = bpf_prog_load_xattr(&prog_load_attr, &obj, &prog_fd);
+ obj = bpf_object__open_file(filename, NULL);
+ if (libbpf_get_error(obj))
+ return 1;
+
+ prog = bpf_object__next_program(obj, NULL);
+ bpf_program__set_type(prog, BPF_PROG_TYPE_XDP);
+
+ err = bpf_object__load(obj);
if (err) {
printf("Does kernel support devmap lookup?\n");
/* If not, the error message will be:
@@ -132,7 +182,13 @@ int main(int argc, char **argv)
return 1;
}
- prog = bpf_object__find_program_by_title(obj, prog_name);
+ bpf_object__for_each_program(pos, obj) {
+ sec_name = bpf_program__section_name(pos);
+ if (sec_name && !strcmp(sec_name, prog_name)) {
+ prog = pos;
+ break;
+ }
+ }
prog_fd = bpf_program__fd(prog);
if (prog_fd < 0) {
printf("program not found: %s\n", strerror(prog_fd));
@@ -156,7 +212,7 @@ int main(int argc, char **argv)
return 1;
}
if (!attach) {
- err = do_detach(idx, argv[i]);
+ err = do_detach(idx, argv[i], prog_name);
if (err)
ret = err;
} else {
diff --git a/samples/bpf/xdp_monitor.bpf.c b/samples/bpf/xdp_monitor.bpf.c
new file mode 100644
index 000000000000..cfb41e2205f4
--- /dev/null
+++ b/samples/bpf/xdp_monitor.bpf.c
@@ -0,0 +1,8 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright(c) 2017-2018 Jesper Dangaard Brouer, Red Hat Inc.
+ *
+ * XDP monitor tool, based on tracepoints
+ */
+#include "xdp_sample.bpf.h"
+
+char _license[] SEC("license") = "GPL";
diff --git a/samples/bpf/xdp_monitor_kern.c b/samples/bpf/xdp_monitor_kern.c
deleted file mode 100644
index 3d33cca2d48a..000000000000
--- a/samples/bpf/xdp_monitor_kern.c
+++ /dev/null
@@ -1,257 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0
- * Copyright(c) 2017-2018 Jesper Dangaard Brouer, Red Hat Inc.
- *
- * XDP monitor tool, based on tracepoints
- */
-#include <uapi/linux/bpf.h>
-#include <bpf/bpf_helpers.h>
-
-struct bpf_map_def SEC("maps") redirect_err_cnt = {
- .type = BPF_MAP_TYPE_PERCPU_ARRAY,
- .key_size = sizeof(u32),
- .value_size = sizeof(u64),
- .max_entries = 2,
- /* TODO: have entries for all possible errno's */
-};
-
-#define XDP_UNKNOWN XDP_REDIRECT + 1
-struct bpf_map_def SEC("maps") exception_cnt = {
- .type = BPF_MAP_TYPE_PERCPU_ARRAY,
- .key_size = sizeof(u32),
- .value_size = sizeof(u64),
- .max_entries = XDP_UNKNOWN + 1,
-};
-
-/* Tracepoint format: /sys/kernel/debug/tracing/events/xdp/xdp_redirect/format
- * Code in: kernel/include/trace/events/xdp.h
- */
-struct xdp_redirect_ctx {
- u64 __pad; // First 8 bytes are not accessible by bpf code
- int prog_id; // offset:8; size:4; signed:1;
- u32 act; // offset:12 size:4; signed:0;
- int ifindex; // offset:16 size:4; signed:1;
- int err; // offset:20 size:4; signed:1;
- int to_ifindex; // offset:24 size:4; signed:1;
- u32 map_id; // offset:28 size:4; signed:0;
- int map_index; // offset:32 size:4; signed:1;
-}; // offset:36
-
-enum {
- XDP_REDIRECT_SUCCESS = 0,
- XDP_REDIRECT_ERROR = 1
-};
-
-static __always_inline
-int xdp_redirect_collect_stat(struct xdp_redirect_ctx *ctx)
-{
- u32 key = XDP_REDIRECT_ERROR;
- int err = ctx->err;
- u64 *cnt;
-
- if (!err)
- key = XDP_REDIRECT_SUCCESS;
-
- cnt = bpf_map_lookup_elem(&redirect_err_cnt, &key);
- if (!cnt)
- return 1;
- *cnt += 1;
-
- return 0; /* Indicate event was filtered (no further processing)*/
- /*
- * Returning 1 here would allow e.g. a perf-record tracepoint
- * to see and record these events, but it doesn't work well
- * in-practice as stopping perf-record also unload this
- * bpf_prog. Plus, there is additional overhead of doing so.
- */
-}
-
-SEC("tracepoint/xdp/xdp_redirect_err")
-int trace_xdp_redirect_err(struct xdp_redirect_ctx *ctx)
-{
- return xdp_redirect_collect_stat(ctx);
-}
-
-
-SEC("tracepoint/xdp/xdp_redirect_map_err")
-int trace_xdp_redirect_map_err(struct xdp_redirect_ctx *ctx)
-{
- return xdp_redirect_collect_stat(ctx);
-}
-
-/* Likely unloaded when prog starts */
-SEC("tracepoint/xdp/xdp_redirect")
-int trace_xdp_redirect(struct xdp_redirect_ctx *ctx)
-{
- return xdp_redirect_collect_stat(ctx);
-}
-
-/* Likely unloaded when prog starts */
-SEC("tracepoint/xdp/xdp_redirect_map")
-int trace_xdp_redirect_map(struct xdp_redirect_ctx *ctx)
-{
- return xdp_redirect_collect_stat(ctx);
-}
-
-/* Tracepoint format: /sys/kernel/debug/tracing/events/xdp/xdp_exception/format
- * Code in: kernel/include/trace/events/xdp.h
- */
-struct xdp_exception_ctx {
- u64 __pad; // First 8 bytes are not accessible by bpf code
- int prog_id; // offset:8; size:4; signed:1;
- u32 act; // offset:12; size:4; signed:0;
- int ifindex; // offset:16; size:4; signed:1;
-};
-
-SEC("tracepoint/xdp/xdp_exception")
-int trace_xdp_exception(struct xdp_exception_ctx *ctx)
-{
- u64 *cnt;
- u32 key;
-
- key = ctx->act;
- if (key > XDP_REDIRECT)
- key = XDP_UNKNOWN;
-
- cnt = bpf_map_lookup_elem(&exception_cnt, &key);
- if (!cnt)
- return 1;
- *cnt += 1;
-
- return 0;
-}
-
-/* Common stats data record shared with _user.c */
-struct datarec {
- u64 processed;
- u64 dropped;
- u64 info;
- u64 err;
-};
-#define MAX_CPUS 64
-
-struct bpf_map_def SEC("maps") cpumap_enqueue_cnt = {
- .type = BPF_MAP_TYPE_PERCPU_ARRAY,
- .key_size = sizeof(u32),
- .value_size = sizeof(struct datarec),
- .max_entries = MAX_CPUS,
-};
-
-struct bpf_map_def SEC("maps") cpumap_kthread_cnt = {
- .type = BPF_MAP_TYPE_PERCPU_ARRAY,
- .key_size = sizeof(u32),
- .value_size = sizeof(struct datarec),
- .max_entries = 1,
-};
-
-/* Tracepoint: /sys/kernel/debug/tracing/events/xdp/xdp_cpumap_enqueue/format
- * Code in: kernel/include/trace/events/xdp.h
- */
-struct cpumap_enqueue_ctx {
- u64 __pad; // First 8 bytes are not accessible by bpf code
- int map_id; // offset:8; size:4; signed:1;
- u32 act; // offset:12; size:4; signed:0;
- int cpu; // offset:16; size:4; signed:1;
- unsigned int drops; // offset:20; size:4; signed:0;
- unsigned int processed; // offset:24; size:4; signed:0;
- int to_cpu; // offset:28; size:4; signed:1;
-};
-
-SEC("tracepoint/xdp/xdp_cpumap_enqueue")
-int trace_xdp_cpumap_enqueue(struct cpumap_enqueue_ctx *ctx)
-{
- u32 to_cpu = ctx->to_cpu;
- struct datarec *rec;
-
- if (to_cpu >= MAX_CPUS)
- return 1;
-
- rec = bpf_map_lookup_elem(&cpumap_enqueue_cnt, &to_cpu);
- if (!rec)
- return 0;
- rec->processed += ctx->processed;
- rec->dropped += ctx->drops;
-
- /* Record bulk events, then userspace can calc average bulk size */
- if (ctx->processed > 0)
- rec->info += 1;
-
- return 0;
-}
-
-/* Tracepoint: /sys/kernel/debug/tracing/events/xdp/xdp_cpumap_kthread/format
- * Code in: kernel/include/trace/events/xdp.h
- */
-struct cpumap_kthread_ctx {
- u64 __pad; // First 8 bytes are not accessible by bpf code
- int map_id; // offset:8; size:4; signed:1;
- u32 act; // offset:12; size:4; signed:0;
- int cpu; // offset:16; size:4; signed:1;
- unsigned int drops; // offset:20; size:4; signed:0;
- unsigned int processed; // offset:24; size:4; signed:0;
- int sched; // offset:28; size:4; signed:1;
-};
-
-SEC("tracepoint/xdp/xdp_cpumap_kthread")
-int trace_xdp_cpumap_kthread(struct cpumap_kthread_ctx *ctx)
-{
- struct datarec *rec;
- u32 key = 0;
-
- rec = bpf_map_lookup_elem(&cpumap_kthread_cnt, &key);
- if (!rec)
- return 0;
- rec->processed += ctx->processed;
- rec->dropped += ctx->drops;
-
- /* Count times kthread yielded CPU via schedule call */
- if (ctx->sched)
- rec->info++;
-
- return 0;
-}
-
-struct bpf_map_def SEC("maps") devmap_xmit_cnt = {
- .type = BPF_MAP_TYPE_PERCPU_ARRAY,
- .key_size = sizeof(u32),
- .value_size = sizeof(struct datarec),
- .max_entries = 1,
-};
-
-/* Tracepoint: /sys/kernel/debug/tracing/events/xdp/xdp_devmap_xmit/format
- * Code in: kernel/include/trace/events/xdp.h
- */
-struct devmap_xmit_ctx {
- u64 __pad; // First 8 bytes are not accessible by bpf code
- int from_ifindex; // offset:8; size:4; signed:1;
- u32 act; // offset:12; size:4; signed:0;
- int to_ifindex; // offset:16; size:4; signed:1;
- int drops; // offset:20; size:4; signed:1;
- int sent; // offset:24; size:4; signed:1;
- int err; // offset:28; size:4; signed:1;
-};
-
-SEC("tracepoint/xdp/xdp_devmap_xmit")
-int trace_xdp_devmap_xmit(struct devmap_xmit_ctx *ctx)
-{
- struct datarec *rec;
- u32 key = 0;
-
- rec = bpf_map_lookup_elem(&devmap_xmit_cnt, &key);
- if (!rec)
- return 0;
- rec->processed += ctx->sent;
- rec->dropped += ctx->drops;
-
- /* Record bulk events, then userspace can calc average bulk size */
- rec->info += 1;
-
- /* Record error cases, where no frame were sent */
- if (ctx->err)
- rec->err++;
-
- /* Catch API error of drv ndo_xdp_xmit sent more than count */
- if (ctx->drops < 0)
- rec->err++;
-
- return 1;
-}
diff --git a/samples/bpf/xdp_monitor_user.c b/samples/bpf/xdp_monitor_user.c
index dd558cbb2309..58015eb2ffae 100644
--- a/samples/bpf/xdp_monitor_user.c
+++ b/samples/bpf/xdp_monitor_user.c
@@ -1,15 +1,12 @@
-/* SPDX-License-Identifier: GPL-2.0
- * Copyright(c) 2017 Jesper Dangaard Brouer, Red Hat, Inc.
- */
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright(c) 2017 Jesper Dangaard Brouer, Red Hat, Inc. */
static const char *__doc__=
- "XDP monitor tool, based on tracepoints\n"
-;
+"XDP monitor tool, based on tracepoints\n";
static const char *__doc_err_only__=
- " NOTICE: Only tracking XDP redirect errors\n"
- " Enable TX success stats via '--stats'\n"
- " (which comes with a per packet processing overhead)\n"
-;
+" NOTICE: Only tracking XDP redirect errors\n"
+" Enable redirect success stats via '-s/--stats'\n"
+" (which comes with a per packet processing overhead)\n";
#include <errno.h>
#include <stdio.h>
@@ -20,696 +17,102 @@ static const char *__doc_err_only__=
#include <ctype.h>
#include <unistd.h>
#include <locale.h>
-
-#include <sys/resource.h>
#include <getopt.h>
#include <net/if.h>
#include <time.h>
-
+#include <signal.h>
#include <bpf/bpf.h>
-#include "bpf_load.h"
+#include <bpf/libbpf.h>
#include "bpf_util.h"
+#include "xdp_sample_user.h"
+#include "xdp_monitor.skel.h"
-static int verbose = 1;
-static bool debug = false;
-
-static const struct option long_options[] = {
- {"help", no_argument, NULL, 'h' },
- {"debug", no_argument, NULL, 'D' },
- {"stats", no_argument, NULL, 'S' },
- {"sec", required_argument, NULL, 's' },
- {0, 0, NULL, 0 }
-};
-
-/* C standard specifies two constants, EXIT_SUCCESS(0) and EXIT_FAILURE(1) */
-#define EXIT_FAIL_MEM 5
-
-static void usage(char *argv[])
-{
- int i;
- printf("\nDOCUMENTATION:\n%s\n", __doc__);
- printf("\n");
- printf(" Usage: %s (options-see-below)\n",
- argv[0]);
- printf(" Listing options:\n");
- for (i = 0; long_options[i].name != 0; i++) {
- printf(" --%-15s", long_options[i].name);
- if (long_options[i].flag != NULL)
- printf(" flag (internal value:%d)",
- *long_options[i].flag);
- else
- printf("short-option: -%c",
- long_options[i].val);
- printf("\n");
- }
- printf("\n");
-}
-
-#define NANOSEC_PER_SEC 1000000000 /* 10^9 */
-static __u64 gettime(void)
-{
- struct timespec t;
- int res;
-
- res = clock_gettime(CLOCK_MONOTONIC, &t);
- if (res < 0) {
- fprintf(stderr, "Error with gettimeofday! (%i)\n", res);
- exit(EXIT_FAILURE);
- }
- return (__u64) t.tv_sec * NANOSEC_PER_SEC + t.tv_nsec;
-}
-
-enum {
- REDIR_SUCCESS = 0,
- REDIR_ERROR = 1,
-};
-#define REDIR_RES_MAX 2
-static const char *redir_names[REDIR_RES_MAX] = {
- [REDIR_SUCCESS] = "Success",
- [REDIR_ERROR] = "Error",
-};
-static const char *err2str(int err)
-{
- if (err < REDIR_RES_MAX)
- return redir_names[err];
- return NULL;
-}
-/* enum xdp_action */
-#define XDP_UNKNOWN XDP_REDIRECT + 1
-#define XDP_ACTION_MAX (XDP_UNKNOWN + 1)
-static const char *xdp_action_names[XDP_ACTION_MAX] = {
- [XDP_ABORTED] = "XDP_ABORTED",
- [XDP_DROP] = "XDP_DROP",
- [XDP_PASS] = "XDP_PASS",
- [XDP_TX] = "XDP_TX",
- [XDP_REDIRECT] = "XDP_REDIRECT",
- [XDP_UNKNOWN] = "XDP_UNKNOWN",
-};
-static const char *action2str(int action)
-{
- if (action < XDP_ACTION_MAX)
- return xdp_action_names[action];
- return NULL;
-}
-
-/* Common stats data record shared with _kern.c */
-struct datarec {
- __u64 processed;
- __u64 dropped;
- __u64 info;
- __u64 err;
-};
-#define MAX_CPUS 64
+static int mask = SAMPLE_REDIRECT_ERR_CNT | SAMPLE_CPUMAP_ENQUEUE_CNT |
+ SAMPLE_CPUMAP_KTHREAD_CNT | SAMPLE_EXCEPTION_CNT |
+ SAMPLE_DEVMAP_XMIT_CNT | SAMPLE_DEVMAP_XMIT_CNT_MULTI;
-/* Userspace structs for collection of stats from maps */
-struct record {
- __u64 timestamp;
- struct datarec total;
- struct datarec *cpu;
-};
-struct u64rec {
- __u64 processed;
-};
-struct record_u64 {
- /* record for _kern side __u64 values */
- __u64 timestamp;
- struct u64rec total;
- struct u64rec *cpu;
-};
+DEFINE_SAMPLE_INIT(xdp_monitor);
-struct stats_record {
- struct record_u64 xdp_redirect[REDIR_RES_MAX];
- struct record_u64 xdp_exception[XDP_ACTION_MAX];
- struct record xdp_cpumap_kthread;
- struct record xdp_cpumap_enqueue[MAX_CPUS];
- struct record xdp_devmap_xmit;
+static const struct option long_options[] = {
+ { "help", no_argument, NULL, 'h' },
+ { "stats", no_argument, NULL, 's' },
+ { "interval", required_argument, NULL, 'i' },
+ { "verbose", no_argument, NULL, 'v' },
+ {}
};
-static bool map_collect_record(int fd, __u32 key, struct record *rec)
-{
- /* For percpu maps, userspace gets a value per possible CPU */
- unsigned int nr_cpus = bpf_num_possible_cpus();
- struct datarec values[nr_cpus];
- __u64 sum_processed = 0;
- __u64 sum_dropped = 0;
- __u64 sum_info = 0;
- __u64 sum_err = 0;
- int i;
-
- if ((bpf_map_lookup_elem(fd, &key, values)) != 0) {
- fprintf(stderr,
- "ERR: bpf_map_lookup_elem failed key:0x%X\n", key);
- return false;
- }
- /* Get time as close as possible to reading map contents */
- rec->timestamp = gettime();
-
- /* Record and sum values from each CPU */
- for (i = 0; i < nr_cpus; i++) {
- rec->cpu[i].processed = values[i].processed;
- sum_processed += values[i].processed;
- rec->cpu[i].dropped = values[i].dropped;
- sum_dropped += values[i].dropped;
- rec->cpu[i].info = values[i].info;
- sum_info += values[i].info;
- rec->cpu[i].err = values[i].err;
- sum_err += values[i].err;
- }
- rec->total.processed = sum_processed;
- rec->total.dropped = sum_dropped;
- rec->total.info = sum_info;
- rec->total.err = sum_err;
- return true;
-}
-
-static bool map_collect_record_u64(int fd, __u32 key, struct record_u64 *rec)
-{
- /* For percpu maps, userspace gets a value per possible CPU */
- unsigned int nr_cpus = bpf_num_possible_cpus();
- struct u64rec values[nr_cpus];
- __u64 sum_total = 0;
- int i;
-
- if ((bpf_map_lookup_elem(fd, &key, values)) != 0) {
- fprintf(stderr,
- "ERR: bpf_map_lookup_elem failed key:0x%X\n", key);
- return false;
- }
- /* Get time as close as possible to reading map contents */
- rec->timestamp = gettime();
-
- /* Record and sum values from each CPU */
- for (i = 0; i < nr_cpus; i++) {
- rec->cpu[i].processed = values[i].processed;
- sum_total += values[i].processed;
- }
- rec->total.processed = sum_total;
- return true;
-}
-
-static double calc_period(struct record *r, struct record *p)
-{
- double period_ = 0;
- __u64 period = 0;
-
- period = r->timestamp - p->timestamp;
- if (period > 0)
- period_ = ((double) period / NANOSEC_PER_SEC);
-
- return period_;
-}
-
-static double calc_period_u64(struct record_u64 *r, struct record_u64 *p)
-{
- double period_ = 0;
- __u64 period = 0;
-
- period = r->timestamp - p->timestamp;
- if (period > 0)
- period_ = ((double) period / NANOSEC_PER_SEC);
-
- return period_;
-}
-
-static double calc_pps(struct datarec *r, struct datarec *p, double period)
-{
- __u64 packets = 0;
- double pps = 0;
-
- if (period > 0) {
- packets = r->processed - p->processed;
- pps = packets / period;
- }
- return pps;
-}
-
-static double calc_pps_u64(struct u64rec *r, struct u64rec *p, double period)
-{
- __u64 packets = 0;
- double pps = 0;
-
- if (period > 0) {
- packets = r->processed - p->processed;
- pps = packets / period;
- }
- return pps;
-}
-
-static double calc_drop(struct datarec *r, struct datarec *p, double period)
-{
- __u64 packets = 0;
- double pps = 0;
-
- if (period > 0) {
- packets = r->dropped - p->dropped;
- pps = packets / period;
- }
- return pps;
-}
-
-static double calc_info(struct datarec *r, struct datarec *p, double period)
-{
- __u64 packets = 0;
- double pps = 0;
-
- if (period > 0) {
- packets = r->info - p->info;
- pps = packets / period;
- }
- return pps;
-}
-
-static double calc_err(struct datarec *r, struct datarec *p, double period)
-{
- __u64 packets = 0;
- double pps = 0;
-
- if (period > 0) {
- packets = r->err - p->err;
- pps = packets / period;
- }
- return pps;
-}
-
-static void stats_print(struct stats_record *stats_rec,
- struct stats_record *stats_prev,
- bool err_only)
-{
- unsigned int nr_cpus = bpf_num_possible_cpus();
- int rec_i = 0, i, to_cpu;
- double t = 0, pps = 0;
-
- /* Header */
- printf("%-15s %-7s %-12s %-12s %-9s\n",
- "XDP-event", "CPU:to", "pps", "drop-pps", "extra-info");
-
- /* tracepoint: xdp:xdp_redirect_* */
- if (err_only)
- rec_i = REDIR_ERROR;
-
- for (; rec_i < REDIR_RES_MAX; rec_i++) {
- struct record_u64 *rec, *prev;
- char *fmt1 = "%-15s %-7d %'-12.0f %'-12.0f %s\n";
- char *fmt2 = "%-15s %-7s %'-12.0f %'-12.0f %s\n";
-
- rec = &stats_rec->xdp_redirect[rec_i];
- prev = &stats_prev->xdp_redirect[rec_i];
- t = calc_period_u64(rec, prev);
-
- for (i = 0; i < nr_cpus; i++) {
- struct u64rec *r = &rec->cpu[i];
- struct u64rec *p = &prev->cpu[i];
-
- pps = calc_pps_u64(r, p, t);
- if (pps > 0)
- printf(fmt1, "XDP_REDIRECT", i,
- rec_i ? 0.0: pps, rec_i ? pps : 0.0,
- err2str(rec_i));
- }
- pps = calc_pps_u64(&rec->total, &prev->total, t);
- printf(fmt2, "XDP_REDIRECT", "total",
- rec_i ? 0.0: pps, rec_i ? pps : 0.0, err2str(rec_i));
- }
-
- /* tracepoint: xdp:xdp_exception */
- for (rec_i = 0; rec_i < XDP_ACTION_MAX; rec_i++) {
- struct record_u64 *rec, *prev;
- char *fmt1 = "%-15s %-7d %'-12.0f %'-12.0f %s\n";
- char *fmt2 = "%-15s %-7s %'-12.0f %'-12.0f %s\n";
-
- rec = &stats_rec->xdp_exception[rec_i];
- prev = &stats_prev->xdp_exception[rec_i];
- t = calc_period_u64(rec, prev);
-
- for (i = 0; i < nr_cpus; i++) {
- struct u64rec *r = &rec->cpu[i];
- struct u64rec *p = &prev->cpu[i];
-
- pps = calc_pps_u64(r, p, t);
- if (pps > 0)
- printf(fmt1, "Exception", i,
- 0.0, pps, action2str(rec_i));
- }
- pps = calc_pps_u64(&rec->total, &prev->total, t);
- if (pps > 0)
- printf(fmt2, "Exception", "total",
- 0.0, pps, action2str(rec_i));
- }
-
- /* cpumap enqueue stats */
- for (to_cpu = 0; to_cpu < MAX_CPUS; to_cpu++) {
- char *fmt1 = "%-15s %3d:%-3d %'-12.0f %'-12.0f %'-10.2f %s\n";
- char *fmt2 = "%-15s %3s:%-3d %'-12.0f %'-12.0f %'-10.2f %s\n";
- struct record *rec, *prev;
- char *info_str = "";
- double drop, info;
-
- rec = &stats_rec->xdp_cpumap_enqueue[to_cpu];
- prev = &stats_prev->xdp_cpumap_enqueue[to_cpu];
- t = calc_period(rec, prev);
- for (i = 0; i < nr_cpus; i++) {
- struct datarec *r = &rec->cpu[i];
- struct datarec *p = &prev->cpu[i];
-
- pps = calc_pps(r, p, t);
- drop = calc_drop(r, p, t);
- info = calc_info(r, p, t);
- if (info > 0) {
- info_str = "bulk-average";
- info = pps / info; /* calc average bulk size */
- }
- if (pps > 0)
- printf(fmt1, "cpumap-enqueue",
- i, to_cpu, pps, drop, info, info_str);
- }
- pps = calc_pps(&rec->total, &prev->total, t);
- if (pps > 0) {
- drop = calc_drop(&rec->total, &prev->total, t);
- info = calc_info(&rec->total, &prev->total, t);
- if (info > 0) {
- info_str = "bulk-average";
- info = pps / info; /* calc average bulk size */
- }
- printf(fmt2, "cpumap-enqueue",
- "sum", to_cpu, pps, drop, info, info_str);
- }
- }
-
- /* cpumap kthread stats */
- {
- char *fmt1 = "%-15s %-7d %'-12.0f %'-12.0f %'-10.0f %s\n";
- char *fmt2 = "%-15s %-7s %'-12.0f %'-12.0f %'-10.0f %s\n";
- struct record *rec, *prev;
- double drop, info;
- char *i_str = "";
-
- rec = &stats_rec->xdp_cpumap_kthread;
- prev = &stats_prev->xdp_cpumap_kthread;
- t = calc_period(rec, prev);
- for (i = 0; i < nr_cpus; i++) {
- struct datarec *r = &rec->cpu[i];
- struct datarec *p = &prev->cpu[i];
-
- pps = calc_pps(r, p, t);
- drop = calc_drop(r, p, t);
- info = calc_info(r, p, t);
- if (info > 0)
- i_str = "sched";
- if (pps > 0 || drop > 0)
- printf(fmt1, "cpumap-kthread",
- i, pps, drop, info, i_str);
- }
- pps = calc_pps(&rec->total, &prev->total, t);
- drop = calc_drop(&rec->total, &prev->total, t);
- info = calc_info(&rec->total, &prev->total, t);
- if (info > 0)
- i_str = "sched-sum";
- printf(fmt2, "cpumap-kthread", "total", pps, drop, info, i_str);
- }
-
- /* devmap ndo_xdp_xmit stats */
- {
- char *fmt1 = "%-15s %-7d %'-12.0f %'-12.0f %'-10.2f %s %s\n";
- char *fmt2 = "%-15s %-7s %'-12.0f %'-12.0f %'-10.2f %s %s\n";
- struct record *rec, *prev;
- double drop, info, err;
- char *i_str = "";
- char *err_str = "";
-
- rec = &stats_rec->xdp_devmap_xmit;
- prev = &stats_prev->xdp_devmap_xmit;
- t = calc_period(rec, prev);
- for (i = 0; i < nr_cpus; i++) {
- struct datarec *r = &rec->cpu[i];
- struct datarec *p = &prev->cpu[i];
-
- pps = calc_pps(r, p, t);
- drop = calc_drop(r, p, t);
- info = calc_info(r, p, t);
- err = calc_err(r, p, t);
- if (info > 0) {
- i_str = "bulk-average";
- info = (pps+drop) / info; /* calc avg bulk */
- }
- if (err > 0)
- err_str = "drv-err";
- if (pps > 0 || drop > 0)
- printf(fmt1, "devmap-xmit",
- i, pps, drop, info, i_str, err_str);
- }
- pps = calc_pps(&rec->total, &prev->total, t);
- drop = calc_drop(&rec->total, &prev->total, t);
- info = calc_info(&rec->total, &prev->total, t);
- err = calc_err(&rec->total, &prev->total, t);
- if (info > 0) {
- i_str = "bulk-average";
- info = (pps+drop) / info; /* calc avg bulk */
- }
- if (err > 0)
- err_str = "drv-err";
- printf(fmt2, "devmap-xmit", "total", pps, drop,
- info, i_str, err_str);
- }
-
- printf("\n");
-}
-
-static bool stats_collect(struct stats_record *rec)
-{
- int fd;
- int i;
-
- /* TODO: Detect if someone unloaded the perf event_fd's, as
- * this can happen by someone running perf-record -e
- */
-
- fd = map_data[0].fd; /* map0: redirect_err_cnt */
- for (i = 0; i < REDIR_RES_MAX; i++)
- map_collect_record_u64(fd, i, &rec->xdp_redirect[i]);
-
- fd = map_data[1].fd; /* map1: exception_cnt */
- for (i = 0; i < XDP_ACTION_MAX; i++) {
- map_collect_record_u64(fd, i, &rec->xdp_exception[i]);
- }
-
- fd = map_data[2].fd; /* map2: cpumap_enqueue_cnt */
- for (i = 0; i < MAX_CPUS; i++)
- map_collect_record(fd, i, &rec->xdp_cpumap_enqueue[i]);
-
- fd = map_data[3].fd; /* map3: cpumap_kthread_cnt */
- map_collect_record(fd, 0, &rec->xdp_cpumap_kthread);
-
- fd = map_data[4].fd; /* map4: devmap_xmit_cnt */
- map_collect_record(fd, 0, &rec->xdp_devmap_xmit);
-
- return true;
-}
-
-static void *alloc_rec_per_cpu(int record_size)
-{
- unsigned int nr_cpus = bpf_num_possible_cpus();
- void *array;
- size_t size;
-
- size = record_size * nr_cpus;
- array = malloc(size);
- memset(array, 0, size);
- if (!array) {
- fprintf(stderr, "Mem alloc error (nr_cpus:%u)\n", nr_cpus);
- exit(EXIT_FAIL_MEM);
- }
- return array;
-}
-
-static struct stats_record *alloc_stats_record(void)
-{
- struct stats_record *rec;
- int rec_sz;
- int i;
-
- /* Alloc main stats_record structure */
- rec = malloc(sizeof(*rec));
- memset(rec, 0, sizeof(*rec));
- if (!rec) {
- fprintf(stderr, "Mem alloc error\n");
- exit(EXIT_FAIL_MEM);
- }
-
- /* Alloc stats stored per CPU for each record */
- rec_sz = sizeof(struct u64rec);
- for (i = 0; i < REDIR_RES_MAX; i++)
- rec->xdp_redirect[i].cpu = alloc_rec_per_cpu(rec_sz);
-
- for (i = 0; i < XDP_ACTION_MAX; i++)
- rec->xdp_exception[i].cpu = alloc_rec_per_cpu(rec_sz);
-
- rec_sz = sizeof(struct datarec);
- rec->xdp_cpumap_kthread.cpu = alloc_rec_per_cpu(rec_sz);
- rec->xdp_devmap_xmit.cpu = alloc_rec_per_cpu(rec_sz);
-
- for (i = 0; i < MAX_CPUS; i++)
- rec->xdp_cpumap_enqueue[i].cpu = alloc_rec_per_cpu(rec_sz);
-
- return rec;
-}
-
-static void free_stats_record(struct stats_record *r)
-{
- int i;
-
- for (i = 0; i < REDIR_RES_MAX; i++)
- free(r->xdp_redirect[i].cpu);
-
- for (i = 0; i < XDP_ACTION_MAX; i++)
- free(r->xdp_exception[i].cpu);
-
- free(r->xdp_cpumap_kthread.cpu);
- free(r->xdp_devmap_xmit.cpu);
-
- for (i = 0; i < MAX_CPUS; i++)
- free(r->xdp_cpumap_enqueue[i].cpu);
-
- free(r);
-}
-
-/* Pointer swap trick */
-static inline void swap(struct stats_record **a, struct stats_record **b)
-{
- struct stats_record *tmp;
-
- tmp = *a;
- *a = *b;
- *b = tmp;
-}
-
-static void stats_poll(int interval, bool err_only)
-{
- struct stats_record *rec, *prev;
-
- rec = alloc_stats_record();
- prev = alloc_stats_record();
- stats_collect(rec);
-
- if (err_only)
- printf("\n%s\n", __doc_err_only__);
-
- /* Trick to pretty printf with thousands separators use %' */
- setlocale(LC_NUMERIC, "en_US");
-
- /* Header */
- if (verbose)
- printf("\n%s", __doc__);
-
- /* TODO Need more advanced stats on error types */
- if (verbose) {
- printf(" - Stats map0: %s\n", map_data[0].name);
- printf(" - Stats map1: %s\n", map_data[1].name);
- printf("\n");
- }
- fflush(stdout);
-
- while (1) {
- swap(&prev, &rec);
- stats_collect(rec);
- stats_print(rec, prev, err_only);
- fflush(stdout);
- sleep(interval);
- }
-
- free_stats_record(rec);
- free_stats_record(prev);
-}
-
-static void print_bpf_prog_info(void)
-{
- int i;
-
- /* Prog info */
- printf("Loaded BPF prog have %d bpf program(s)\n", prog_cnt);
- for (i = 0; i < prog_cnt; i++) {
- printf(" - prog_fd[%d] = fd(%d)\n", i, prog_fd[i]);
- }
-
- /* Maps info */
- printf("Loaded BPF prog have %d map(s)\n", map_data_count);
- for (i = 0; i < map_data_count; i++) {
- char *name = map_data[i].name;
- int fd = map_data[i].fd;
-
- printf(" - map_data[%d] = fd(%d) name:%s\n", i, fd, name);
- }
-
- /* Event info */
- printf("Searching for (max:%d) event file descriptor(s)\n", prog_cnt);
- for (i = 0; i < prog_cnt; i++) {
- if (event_fd[i] != -1)
- printf(" - event_fd[%d] = fd(%d)\n", i, event_fd[i]);
- }
-}
-
int main(int argc, char **argv)
{
- struct rlimit r = {RLIM_INFINITY, RLIM_INFINITY};
- int longindex = 0, opt;
- int ret = EXIT_SUCCESS;
- char bpf_obj_file[256];
-
- /* Default settings: */
+ unsigned long interval = 2;
+ int ret = EXIT_FAIL_OPTION;
+ struct xdp_monitor *skel;
bool errors_only = true;
- int interval = 2;
-
- snprintf(bpf_obj_file, sizeof(bpf_obj_file), "%s_kern.o", argv[0]);
+ int longindex = 0, opt;
+ bool error = true;
/* Parse commands line args */
- while ((opt = getopt_long(argc, argv, "hDSs:",
+ while ((opt = getopt_long(argc, argv, "si:vh",
long_options, &longindex)) != -1) {
switch (opt) {
- case 'D':
- debug = true;
- break;
- case 'S':
+ case 's':
errors_only = false;
+ mask |= SAMPLE_REDIRECT_CNT;
break;
- case 's':
- interval = atoi(optarg);
+ case 'i':
+ interval = strtoul(optarg, NULL, 0);
+ break;
+ case 'v':
+ sample_switch_mode();
break;
case 'h':
+ error = false;
default:
- usage(argv);
- return EXIT_FAILURE;
+ sample_usage(argv, long_options, __doc__, mask, error);
+ return ret;
}
}
- if (setrlimit(RLIMIT_MEMLOCK, &r)) {
- perror("setrlimit(RLIMIT_MEMLOCK)");
- return EXIT_FAILURE;
+ skel = xdp_monitor__open();
+ if (!skel) {
+ fprintf(stderr, "Failed to xdp_monitor__open: %s\n",
+ strerror(errno));
+ ret = EXIT_FAIL_BPF;
+ goto end;
}
- if (load_bpf_file(bpf_obj_file)) {
- printf("ERROR - bpf_log_buf: %s", bpf_log_buf);
- return EXIT_FAILURE;
- }
- if (!prog_fd[0]) {
- printf("ERROR - load_bpf_file: %s\n", strerror(errno));
- return EXIT_FAILURE;
+ ret = sample_init_pre_load(skel);
+ if (ret < 0) {
+ fprintf(stderr, "Failed to sample_init_pre_load: %s\n", strerror(-ret));
+ ret = EXIT_FAIL_BPF;
+ goto end_destroy;
}
- if (debug) {
- print_bpf_prog_info();
+ ret = xdp_monitor__load(skel);
+ if (ret < 0) {
+ fprintf(stderr, "Failed to xdp_monitor__load: %s\n", strerror(errno));
+ ret = EXIT_FAIL_BPF;
+ goto end_destroy;
}
- /* Unload/stop tracepoint event by closing fd's */
- if (errors_only) {
- /* The prog_fd[i] and event_fd[i] depend on the
- * order the functions was defined in _kern.c
- */
- close(event_fd[2]); /* tracepoint/xdp/xdp_redirect */
- close(prog_fd[2]); /* func: trace_xdp_redirect */
- close(event_fd[3]); /* tracepoint/xdp/xdp_redirect_map */
- close(prog_fd[3]); /* func: trace_xdp_redirect_map */
+ ret = sample_init(skel, mask);
+ if (ret < 0) {
+ fprintf(stderr, "Failed to initialize sample: %s\n", strerror(-ret));
+ ret = EXIT_FAIL_BPF;
+ goto end_destroy;
}
- stats_poll(interval, errors_only);
+ if (errors_only)
+ printf("%s", __doc_err_only__);
- return ret;
+ ret = sample_run(interval, NULL, NULL);
+ if (ret < 0) {
+ fprintf(stderr, "Failed during sample run: %s\n", strerror(-ret));
+ ret = EXIT_FAIL;
+ goto end_destroy;
+ }
+ ret = EXIT_OK;
+end_destroy:
+ xdp_monitor__destroy(skel);
+end:
+ sample_exit(ret);
}
diff --git a/samples/bpf/xdp_redirect.bpf.c b/samples/bpf/xdp_redirect.bpf.c
new file mode 100644
index 000000000000..7c02bacfe96b
--- /dev/null
+++ b/samples/bpf/xdp_redirect.bpf.c
@@ -0,0 +1,49 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2016 John Fastabend <john.r.fastabend@intel.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ */
+#include "vmlinux.h"
+#include "xdp_sample.bpf.h"
+#include "xdp_sample_shared.h"
+
+const volatile int ifindex_out;
+
+SEC("xdp")
+int xdp_redirect_prog(struct xdp_md *ctx)
+{
+ void *data_end = (void *)(long)ctx->data_end;
+ void *data = (void *)(long)ctx->data;
+ u32 key = bpf_get_smp_processor_id();
+ struct ethhdr *eth = data;
+ struct datarec *rec;
+ u64 nh_off;
+
+ nh_off = sizeof(*eth);
+ if (data + nh_off > data_end)
+ return XDP_DROP;
+
+ rec = bpf_map_lookup_elem(&rx_cnt, &key);
+ if (!rec)
+ return XDP_PASS;
+ NO_TEAR_INC(rec->processed);
+
+ swap_src_dst_mac(data);
+ return bpf_redirect(ifindex_out, 0);
+}
+
+/* Redirect require an XDP bpf_prog loaded on the TX device */
+SEC("xdp")
+int xdp_redirect_dummy_prog(struct xdp_md *ctx)
+{
+ return XDP_PASS;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/samples/bpf/xdp_redirect_cpu_kern.c b/samples/bpf/xdp_redirect_cpu.bpf.c
index 313a8fe6d125..87c54bfdbb70 100644
--- a/samples/bpf/xdp_redirect_cpu_kern.c
+++ b/samples/bpf/xdp_redirect_cpu.bpf.c
@@ -2,71 +2,18 @@
*
* GPLv2, Copyright(c) 2017 Jesper Dangaard Brouer, Red Hat, Inc.
*/
-#include <uapi/linux/if_ether.h>
-#include <uapi/linux/if_packet.h>
-#include <uapi/linux/if_vlan.h>
-#include <uapi/linux/ip.h>
-#include <uapi/linux/ipv6.h>
-#include <uapi/linux/in.h>
-#include <uapi/linux/tcp.h>
-#include <uapi/linux/udp.h>
-
-#include <uapi/linux/bpf.h>
-#include <bpf/bpf_helpers.h>
+#include "vmlinux.h"
+#include "xdp_sample.bpf.h"
+#include "xdp_sample_shared.h"
#include "hash_func01.h"
-#define MAX_CPUS 64 /* WARNING - sync with _user.c */
-
/* Special map type that can XDP_REDIRECT frames to another CPU */
struct {
__uint(type, BPF_MAP_TYPE_CPUMAP);
__uint(key_size, sizeof(u32));
- __uint(value_size, sizeof(u32));
- __uint(max_entries, MAX_CPUS);
+ __uint(value_size, sizeof(struct bpf_cpumap_val));
} cpu_map SEC(".maps");
-/* Common stats data record to keep userspace more simple */
-struct datarec {
- __u64 processed;
- __u64 dropped;
- __u64 issue;
-};
-
-/* Count RX packets, as XDP bpf_prog doesn't get direct TX-success
- * feedback. Redirect TX errors can be caught via a tracepoint.
- */
-struct {
- __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY);
- __type(key, u32);
- __type(value, struct datarec);
- __uint(max_entries, 1);
-} rx_cnt SEC(".maps");
-
-/* Used by trace point */
-struct {
- __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY);
- __type(key, u32);
- __type(value, struct datarec);
- __uint(max_entries, 2);
- /* TODO: have entries for all possible errno's */
-} redirect_err_cnt SEC(".maps");
-
-/* Used by trace point */
-struct {
- __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY);
- __type(key, u32);
- __type(value, struct datarec);
- __uint(max_entries, MAX_CPUS);
-} cpumap_enqueue_cnt SEC(".maps");
-
-/* Used by trace point */
-struct {
- __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY);
- __type(key, u32);
- __type(value, struct datarec);
- __uint(max_entries, 1);
-} cpumap_kthread_cnt SEC(".maps");
-
/* Set of maps controlling available CPU, and for iterating through
* selectable redirect CPUs.
*/
@@ -74,14 +21,15 @@ struct {
__uint(type, BPF_MAP_TYPE_ARRAY);
__type(key, u32);
__type(value, u32);
- __uint(max_entries, MAX_CPUS);
} cpus_available SEC(".maps");
+
struct {
__uint(type, BPF_MAP_TYPE_ARRAY);
__type(key, u32);
__type(value, u32);
__uint(max_entries, 1);
} cpus_count SEC(".maps");
+
struct {
__uint(type, BPF_MAP_TYPE_PERCPU_ARRAY);
__type(key, u32);
@@ -89,24 +37,16 @@ struct {
__uint(max_entries, 1);
} cpus_iterator SEC(".maps");
-/* Used by trace point */
struct {
- __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY);
- __type(key, u32);
- __type(value, struct datarec);
+ __uint(type, BPF_MAP_TYPE_DEVMAP);
+ __uint(key_size, sizeof(int));
+ __uint(value_size, sizeof(struct bpf_devmap_val));
__uint(max_entries, 1);
-} exception_cnt SEC(".maps");
+} tx_port SEC(".maps");
-/* Helper parse functions */
+char tx_mac_addr[ETH_ALEN];
-/* Parse Ethernet layer 2, extract network layer 3 offset and protocol
- *
- * Returns false on error and non-supported ether-type
- */
-struct vlan_hdr {
- __be16 h_vlan_TCI;
- __be16 h_vlan_encapsulated_proto;
-};
+/* Helper parse functions */
static __always_inline
bool parse_eth(struct ethhdr *eth, void *data_end,
@@ -122,11 +62,12 @@ bool parse_eth(struct ethhdr *eth, void *data_end,
eth_type = eth->h_proto;
/* Skip non 802.3 Ethertypes */
- if (unlikely(ntohs(eth_type) < ETH_P_802_3_MIN))
+ if (__builtin_expect(bpf_ntohs(eth_type) < ETH_P_802_3_MIN, 0))
return false;
/* Handle VLAN tagged packet */
- if (eth_type == htons(ETH_P_8021Q) || eth_type == htons(ETH_P_8021AD)) {
+ if (eth_type == bpf_htons(ETH_P_8021Q) ||
+ eth_type == bpf_htons(ETH_P_8021AD)) {
struct vlan_hdr *vlan_hdr;
vlan_hdr = (void *)eth + offset;
@@ -136,7 +77,8 @@ bool parse_eth(struct ethhdr *eth, void *data_end,
eth_type = vlan_hdr->h_vlan_encapsulated_proto;
}
/* Handle double VLAN tagged packet */
- if (eth_type == htons(ETH_P_8021Q) || eth_type == htons(ETH_P_8021AD)) {
+ if (eth_type == bpf_htons(ETH_P_8021Q) ||
+ eth_type == bpf_htons(ETH_P_8021AD)) {
struct vlan_hdr *vlan_hdr;
vlan_hdr = (void *)eth + offset;
@@ -146,7 +88,7 @@ bool parse_eth(struct ethhdr *eth, void *data_end,
eth_type = vlan_hdr->h_vlan_encapsulated_proto;
}
- *eth_proto = ntohs(eth_type);
+ *eth_proto = bpf_ntohs(eth_type);
*l3_offset = offset;
return true;
}
@@ -158,7 +100,6 @@ u16 get_dest_port_ipv4_udp(struct xdp_md *ctx, u64 nh_off)
void *data = (void *)(long)ctx->data;
struct iphdr *iph = data + nh_off;
struct udphdr *udph;
- u16 dport;
if (iph + 1 > data_end)
return 0;
@@ -169,8 +110,7 @@ u16 get_dest_port_ipv4_udp(struct xdp_md *ctx, u64 nh_off)
if (udph + 1 > data_end)
return 0;
- dport = ntohs(udph->dest);
- return dport;
+ return bpf_ntohs(udph->dest);
}
static __always_inline
@@ -197,50 +137,48 @@ int get_proto_ipv6(struct xdp_md *ctx, u64 nh_off)
return ip6h->nexthdr;
}
-SEC("xdp_cpu_map0")
+SEC("xdp")
int xdp_prognum0_no_touch(struct xdp_md *ctx)
{
- void *data_end = (void *)(long)ctx->data_end;
- void *data = (void *)(long)ctx->data;
+ u32 key = bpf_get_smp_processor_id();
struct datarec *rec;
u32 *cpu_selected;
- u32 cpu_dest;
- u32 key = 0;
+ u32 cpu_dest = 0;
+ u32 key0 = 0;
/* Only use first entry in cpus_available */
- cpu_selected = bpf_map_lookup_elem(&cpus_available, &key);
+ cpu_selected = bpf_map_lookup_elem(&cpus_available, &key0);
if (!cpu_selected)
return XDP_ABORTED;
cpu_dest = *cpu_selected;
- /* Count RX packet in map */
rec = bpf_map_lookup_elem(&rx_cnt, &key);
if (!rec)
- return XDP_ABORTED;
- rec->processed++;
+ return XDP_PASS;
+ NO_TEAR_INC(rec->processed);
- if (cpu_dest >= MAX_CPUS) {
- rec->issue++;
+ if (cpu_dest >= nr_cpus) {
+ NO_TEAR_INC(rec->issue);
return XDP_ABORTED;
}
-
return bpf_redirect_map(&cpu_map, cpu_dest, 0);
}
-SEC("xdp_cpu_map1_touch_data")
+SEC("xdp")
int xdp_prognum1_touch_data(struct xdp_md *ctx)
{
void *data_end = (void *)(long)ctx->data_end;
void *data = (void *)(long)ctx->data;
+ u32 key = bpf_get_smp_processor_id();
struct ethhdr *eth = data;
struct datarec *rec;
u32 *cpu_selected;
- u32 cpu_dest;
+ u32 cpu_dest = 0;
+ u32 key0 = 0;
u16 eth_type;
- u32 key = 0;
/* Only use first entry in cpus_available */
- cpu_selected = bpf_map_lookup_elem(&cpus_available, &key);
+ cpu_selected = bpf_map_lookup_elem(&cpus_available, &key0);
if (!cpu_selected)
return XDP_ABORTED;
cpu_dest = *cpu_selected;
@@ -249,36 +187,33 @@ int xdp_prognum1_touch_data(struct xdp_md *ctx)
if (eth + 1 > data_end)
return XDP_ABORTED;
- /* Count RX packet in map */
rec = bpf_map_lookup_elem(&rx_cnt, &key);
if (!rec)
- return XDP_ABORTED;
- rec->processed++;
+ return XDP_PASS;
+ NO_TEAR_INC(rec->processed);
/* Read packet data, and use it (drop non 802.3 Ethertypes) */
eth_type = eth->h_proto;
- if (ntohs(eth_type) < ETH_P_802_3_MIN) {
- rec->dropped++;
+ if (bpf_ntohs(eth_type) < ETH_P_802_3_MIN) {
+ NO_TEAR_INC(rec->dropped);
return XDP_DROP;
}
- if (cpu_dest >= MAX_CPUS) {
- rec->issue++;
+ if (cpu_dest >= nr_cpus) {
+ NO_TEAR_INC(rec->issue);
return XDP_ABORTED;
}
-
return bpf_redirect_map(&cpu_map, cpu_dest, 0);
}
-SEC("xdp_cpu_map2_round_robin")
+SEC("xdp")
int xdp_prognum2_round_robin(struct xdp_md *ctx)
{
void *data_end = (void *)(long)ctx->data_end;
void *data = (void *)(long)ctx->data;
- struct ethhdr *eth = data;
+ u32 key = bpf_get_smp_processor_id();
struct datarec *rec;
- u32 cpu_dest;
- u32 *cpu_lookup;
+ u32 cpu_dest = 0;
u32 key0 = 0;
u32 *cpu_selected;
@@ -304,40 +239,37 @@ int xdp_prognum2_round_robin(struct xdp_md *ctx)
return XDP_ABORTED;
cpu_dest = *cpu_selected;
- /* Count RX packet in map */
- rec = bpf_map_lookup_elem(&rx_cnt, &key0);
+ rec = bpf_map_lookup_elem(&rx_cnt, &key);
if (!rec)
- return XDP_ABORTED;
- rec->processed++;
+ return XDP_PASS;
+ NO_TEAR_INC(rec->processed);
- if (cpu_dest >= MAX_CPUS) {
- rec->issue++;
+ if (cpu_dest >= nr_cpus) {
+ NO_TEAR_INC(rec->issue);
return XDP_ABORTED;
}
-
return bpf_redirect_map(&cpu_map, cpu_dest, 0);
}
-SEC("xdp_cpu_map3_proto_separate")
+SEC("xdp")
int xdp_prognum3_proto_separate(struct xdp_md *ctx)
{
void *data_end = (void *)(long)ctx->data_end;
void *data = (void *)(long)ctx->data;
+ u32 key = bpf_get_smp_processor_id();
struct ethhdr *eth = data;
u8 ip_proto = IPPROTO_UDP;
struct datarec *rec;
u16 eth_proto = 0;
u64 l3_offset = 0;
u32 cpu_dest = 0;
- u32 cpu_idx = 0;
u32 *cpu_lookup;
- u32 key = 0;
+ u32 cpu_idx = 0;
- /* Count RX packet in map */
rec = bpf_map_lookup_elem(&rx_cnt, &key);
if (!rec)
- return XDP_ABORTED;
- rec->processed++;
+ return XDP_PASS;
+ NO_TEAR_INC(rec->processed);
if (!(parse_eth(eth, data_end, &eth_proto, &l3_offset)))
return XDP_PASS; /* Just skip */
@@ -378,35 +310,33 @@ int xdp_prognum3_proto_separate(struct xdp_md *ctx)
return XDP_ABORTED;
cpu_dest = *cpu_lookup;
- if (cpu_dest >= MAX_CPUS) {
- rec->issue++;
+ if (cpu_dest >= nr_cpus) {
+ NO_TEAR_INC(rec->issue);
return XDP_ABORTED;
}
-
return bpf_redirect_map(&cpu_map, cpu_dest, 0);
}
-SEC("xdp_cpu_map4_ddos_filter_pktgen")
+SEC("xdp")
int xdp_prognum4_ddos_filter_pktgen(struct xdp_md *ctx)
{
void *data_end = (void *)(long)ctx->data_end;
void *data = (void *)(long)ctx->data;
+ u32 key = bpf_get_smp_processor_id();
struct ethhdr *eth = data;
u8 ip_proto = IPPROTO_UDP;
struct datarec *rec;
u16 eth_proto = 0;
u64 l3_offset = 0;
u32 cpu_dest = 0;
+ u32 *cpu_lookup;
u32 cpu_idx = 0;
u16 dest_port;
- u32 *cpu_lookup;
- u32 key = 0;
- /* Count RX packet in map */
rec = bpf_map_lookup_elem(&rx_cnt, &key);
if (!rec)
- return XDP_ABORTED;
- rec->processed++;
+ return XDP_PASS;
+ NO_TEAR_INC(rec->processed);
if (!(parse_eth(eth, data_end, &eth_proto, &l3_offset)))
return XDP_PASS; /* Just skip */
@@ -440,8 +370,7 @@ int xdp_prognum4_ddos_filter_pktgen(struct xdp_md *ctx)
/* DDoS filter UDP port 9 (pktgen) */
dest_port = get_dest_port_ipv4_udp(ctx, l3_offset);
if (dest_port == 9) {
- if (rec)
- rec->dropped++;
+ NO_TEAR_INC(rec->dropped);
return XDP_DROP;
}
break;
@@ -454,11 +383,10 @@ int xdp_prognum4_ddos_filter_pktgen(struct xdp_md *ctx)
return XDP_ABORTED;
cpu_dest = *cpu_lookup;
- if (cpu_dest >= MAX_CPUS) {
- rec->issue++;
+ if (cpu_dest >= nr_cpus) {
+ NO_TEAR_INC(rec->issue);
return XDP_ABORTED;
}
-
return bpf_redirect_map(&cpu_map, cpu_dest, 0);
}
@@ -493,10 +421,10 @@ u32 get_ipv6_hash_ip_pair(struct xdp_md *ctx, u64 nh_off)
if (ip6h + 1 > data_end)
return 0;
- cpu_hash = ip6h->saddr.s6_addr32[0] + ip6h->daddr.s6_addr32[0];
- cpu_hash += ip6h->saddr.s6_addr32[1] + ip6h->daddr.s6_addr32[1];
- cpu_hash += ip6h->saddr.s6_addr32[2] + ip6h->daddr.s6_addr32[2];
- cpu_hash += ip6h->saddr.s6_addr32[3] + ip6h->daddr.s6_addr32[3];
+ cpu_hash = ip6h->saddr.in6_u.u6_addr32[0] + ip6h->daddr.in6_u.u6_addr32[0];
+ cpu_hash += ip6h->saddr.in6_u.u6_addr32[1] + ip6h->daddr.in6_u.u6_addr32[1];
+ cpu_hash += ip6h->saddr.in6_u.u6_addr32[2] + ip6h->daddr.in6_u.u6_addr32[2];
+ cpu_hash += ip6h->saddr.in6_u.u6_addr32[3] + ip6h->daddr.in6_u.u6_addr32[3];
cpu_hash = SuperFastHash((char *)&cpu_hash, 4, INITVAL + ip6h->nexthdr);
return cpu_hash;
@@ -506,30 +434,29 @@ u32 get_ipv6_hash_ip_pair(struct xdp_md *ctx, u64 nh_off)
* hashing scheme is symmetric, meaning swapping IP src/dest still hit
* same CPU.
*/
-SEC("xdp_cpu_map5_lb_hash_ip_pairs")
+SEC("xdp")
int xdp_prognum5_lb_hash_ip_pairs(struct xdp_md *ctx)
{
void *data_end = (void *)(long)ctx->data_end;
void *data = (void *)(long)ctx->data;
+ u32 key = bpf_get_smp_processor_id();
struct ethhdr *eth = data;
- u8 ip_proto = IPPROTO_UDP;
struct datarec *rec;
u16 eth_proto = 0;
u64 l3_offset = 0;
u32 cpu_dest = 0;
u32 cpu_idx = 0;
u32 *cpu_lookup;
+ u32 key0 = 0;
u32 *cpu_max;
u32 cpu_hash;
- u32 key = 0;
- /* Count RX packet in map */
rec = bpf_map_lookup_elem(&rx_cnt, &key);
if (!rec)
- return XDP_ABORTED;
- rec->processed++;
+ return XDP_PASS;
+ NO_TEAR_INC(rec->processed);
- cpu_max = bpf_map_lookup_elem(&cpus_count, &key);
+ cpu_max = bpf_map_lookup_elem(&cpus_count, &key0);
if (!cpu_max)
return XDP_ABORTED;
@@ -557,165 +484,56 @@ int xdp_prognum5_lb_hash_ip_pairs(struct xdp_md *ctx)
return XDP_ABORTED;
cpu_dest = *cpu_lookup;
- if (cpu_dest >= MAX_CPUS) {
- rec->issue++;
+ if (cpu_dest >= nr_cpus) {
+ NO_TEAR_INC(rec->issue);
return XDP_ABORTED;
}
-
return bpf_redirect_map(&cpu_map, cpu_dest, 0);
}
-char _license[] SEC("license") = "GPL";
-
-/*** Trace point code ***/
-
-/* Tracepoint format: /sys/kernel/debug/tracing/events/xdp/xdp_redirect/format
- * Code in: kernel/include/trace/events/xdp.h
- */
-struct xdp_redirect_ctx {
- u64 __pad; // First 8 bytes are not accessible by bpf code
- int prog_id; // offset:8; size:4; signed:1;
- u32 act; // offset:12 size:4; signed:0;
- int ifindex; // offset:16 size:4; signed:1;
- int err; // offset:20 size:4; signed:1;
- int to_ifindex; // offset:24 size:4; signed:1;
- u32 map_id; // offset:28 size:4; signed:0;
- int map_index; // offset:32 size:4; signed:1;
-}; // offset:36
-
-enum {
- XDP_REDIRECT_SUCCESS = 0,
- XDP_REDIRECT_ERROR = 1
-};
-
-static __always_inline
-int xdp_redirect_collect_stat(struct xdp_redirect_ctx *ctx)
+SEC("xdp/cpumap")
+int xdp_redirect_cpu_devmap(struct xdp_md *ctx)
{
- u32 key = XDP_REDIRECT_ERROR;
- struct datarec *rec;
- int err = ctx->err;
+ void *data_end = (void *)(long)ctx->data_end;
+ void *data = (void *)(long)ctx->data;
+ struct ethhdr *eth = data;
+ u64 nh_off;
- if (!err)
- key = XDP_REDIRECT_SUCCESS;
+ nh_off = sizeof(*eth);
+ if (data + nh_off > data_end)
+ return XDP_DROP;
- rec = bpf_map_lookup_elem(&redirect_err_cnt, &key);
- if (!rec)
- return 0;
- rec->dropped += 1;
-
- return 0; /* Indicate event was filtered (no further processing)*/
- /*
- * Returning 1 here would allow e.g. a perf-record tracepoint
- * to see and record these events, but it doesn't work well
- * in-practice as stopping perf-record also unload this
- * bpf_prog. Plus, there is additional overhead of doing so.
- */
+ swap_src_dst_mac(data);
+ return bpf_redirect_map(&tx_port, 0, 0);
}
-SEC("tracepoint/xdp/xdp_redirect_err")
-int trace_xdp_redirect_err(struct xdp_redirect_ctx *ctx)
+SEC("xdp/cpumap")
+int xdp_redirect_cpu_pass(struct xdp_md *ctx)
{
- return xdp_redirect_collect_stat(ctx);
+ return XDP_PASS;
}
-SEC("tracepoint/xdp/xdp_redirect_map_err")
-int trace_xdp_redirect_map_err(struct xdp_redirect_ctx *ctx)
+SEC("xdp/cpumap")
+int xdp_redirect_cpu_drop(struct xdp_md *ctx)
{
- return xdp_redirect_collect_stat(ctx);
+ return XDP_DROP;
}
-/* Tracepoint format: /sys/kernel/debug/tracing/events/xdp/xdp_exception/format
- * Code in: kernel/include/trace/events/xdp.h
- */
-struct xdp_exception_ctx {
- u64 __pad; // First 8 bytes are not accessible by bpf code
- int prog_id; // offset:8; size:4; signed:1;
- u32 act; // offset:12; size:4; signed:0;
- int ifindex; // offset:16; size:4; signed:1;
-};
-
-SEC("tracepoint/xdp/xdp_exception")
-int trace_xdp_exception(struct xdp_exception_ctx *ctx)
+SEC("xdp/devmap")
+int xdp_redirect_egress_prog(struct xdp_md *ctx)
{
- struct datarec *rec;
- u32 key = 0;
-
- rec = bpf_map_lookup_elem(&exception_cnt, &key);
- if (!rec)
- return 1;
- rec->dropped += 1;
-
- return 0;
-}
+ void *data_end = (void *)(long)ctx->data_end;
+ void *data = (void *)(long)ctx->data;
+ struct ethhdr *eth = data;
+ u64 nh_off;
-/* Tracepoint: /sys/kernel/debug/tracing/events/xdp/xdp_cpumap_enqueue/format
- * Code in: kernel/include/trace/events/xdp.h
- */
-struct cpumap_enqueue_ctx {
- u64 __pad; // First 8 bytes are not accessible by bpf code
- int map_id; // offset:8; size:4; signed:1;
- u32 act; // offset:12; size:4; signed:0;
- int cpu; // offset:16; size:4; signed:1;
- unsigned int drops; // offset:20; size:4; signed:0;
- unsigned int processed; // offset:24; size:4; signed:0;
- int to_cpu; // offset:28; size:4; signed:1;
-};
-
-SEC("tracepoint/xdp/xdp_cpumap_enqueue")
-int trace_xdp_cpumap_enqueue(struct cpumap_enqueue_ctx *ctx)
-{
- u32 to_cpu = ctx->to_cpu;
- struct datarec *rec;
+ nh_off = sizeof(*eth);
+ if (data + nh_off > data_end)
+ return XDP_DROP;
- if (to_cpu >= MAX_CPUS)
- return 1;
+ __builtin_memcpy(eth->h_source, (const char *)tx_mac_addr, ETH_ALEN);
- rec = bpf_map_lookup_elem(&cpumap_enqueue_cnt, &to_cpu);
- if (!rec)
- return 0;
- rec->processed += ctx->processed;
- rec->dropped += ctx->drops;
-
- /* Record bulk events, then userspace can calc average bulk size */
- if (ctx->processed > 0)
- rec->issue += 1;
-
- /* Inception: It's possible to detect overload situations, via
- * this tracepoint. This can be used for creating a feedback
- * loop to XDP, which can take appropriate actions to mitigate
- * this overload situation.
- */
- return 0;
+ return XDP_PASS;
}
-/* Tracepoint: /sys/kernel/debug/tracing/events/xdp/xdp_cpumap_kthread/format
- * Code in: kernel/include/trace/events/xdp.h
- */
-struct cpumap_kthread_ctx {
- u64 __pad; // First 8 bytes are not accessible by bpf code
- int map_id; // offset:8; size:4; signed:1;
- u32 act; // offset:12; size:4; signed:0;
- int cpu; // offset:16; size:4; signed:1;
- unsigned int drops; // offset:20; size:4; signed:0;
- unsigned int processed; // offset:24; size:4; signed:0;
- int sched; // offset:28; size:4; signed:1;
-};
-
-SEC("tracepoint/xdp/xdp_cpumap_kthread")
-int trace_xdp_cpumap_kthread(struct cpumap_kthread_ctx *ctx)
-{
- struct datarec *rec;
- u32 key = 0;
-
- rec = bpf_map_lookup_elem(&cpumap_kthread_cnt, &key);
- if (!rec)
- return 0;
- rec->processed += ctx->processed;
- rec->dropped += ctx->drops;
-
- /* Count times kthread yielded CPU via schedule call */
- if (ctx->sched)
- rec->issue++;
-
- return 0;
-}
+char _license[] SEC("license") = "GPL";
diff --git a/samples/bpf/xdp_redirect_cpu_user.c b/samples/bpf/xdp_redirect_cpu_user.c
index 15bdf047a222..a12381c37d2b 100644
--- a/samples/bpf/xdp_redirect_cpu_user.c
+++ b/samples/bpf/xdp_redirect_cpu_user.c
@@ -2,7 +2,16 @@
/* Copyright(c) 2017 Jesper Dangaard Brouer, Red Hat, Inc.
*/
static const char *__doc__ =
- " XDP redirect with a CPU-map type \"BPF_MAP_TYPE_CPUMAP\"";
+"XDP CPU redirect tool, using BPF_MAP_TYPE_CPUMAP\n"
+"Usage: xdp_redirect_cpu -d <IFINDEX|IFNAME> -c 0 ... -c N\n"
+"Valid specification for CPUMAP BPF program:\n"
+" --mprog-name/-e pass (use built-in XDP_PASS program)\n"
+" --mprog-name/-e drop (use built-in XDP_DROP program)\n"
+" --redirect-device/-r <ifindex|ifname> (use built-in DEVMAP redirect program)\n"
+" Custom CPUMAP BPF program:\n"
+" --mprog-filename/-f <filename> --mprog-name/-e <program>\n"
+" Optionally, also pass --redirect-map/-m and --redirect-device/-r together\n"
+" to configure DEVMAP in BPF object <filename>\n";
#include <errno.h>
#include <signal.h>
@@ -12,491 +21,70 @@ static const char *__doc__ =
#include <string.h>
#include <unistd.h>
#include <locale.h>
-#include <sys/resource.h>
+#include <sys/sysinfo.h>
#include <getopt.h>
#include <net/if.h>
#include <time.h>
#include <linux/limits.h>
-
-#define __must_check
-#include <linux/err.h>
-
#include <arpa/inet.h>
#include <linux/if_link.h>
-
-#define MAX_CPUS 64 /* WARNING - sync with _kern.c */
-
-/* How many xdp_progs are defined in _kern.c */
-#define MAX_PROG 6
-
#include <bpf/bpf.h>
#include <bpf/libbpf.h>
-
#include "bpf_util.h"
+#include "xdp_sample_user.h"
+#include "xdp_redirect_cpu.skel.h"
-static int ifindex = -1;
-static char ifname_buf[IF_NAMESIZE];
-static char *ifname;
-static __u32 prog_id;
-
-static __u32 xdp_flags = XDP_FLAGS_UPDATE_IF_NOEXIST;
-static int cpu_map_fd;
-static int rx_cnt_map_fd;
-static int redirect_err_cnt_map_fd;
-static int cpumap_enqueue_cnt_map_fd;
-static int cpumap_kthread_cnt_map_fd;
-static int cpus_available_map_fd;
-static int cpus_count_map_fd;
-static int cpus_iterator_map_fd;
-static int exception_cnt_map_fd;
-
-#define NUM_TP 5
-struct bpf_link *tp_links[NUM_TP] = { 0 };
-static int tp_cnt = 0;
-
-/* Exit return codes */
-#define EXIT_OK 0
-#define EXIT_FAIL 1
-#define EXIT_FAIL_OPTION 2
-#define EXIT_FAIL_XDP 3
-#define EXIT_FAIL_BPF 4
-#define EXIT_FAIL_MEM 5
+static int map_fd;
+static int avail_fd;
+static int count_fd;
-static const struct option long_options[] = {
- {"help", no_argument, NULL, 'h' },
- {"dev", required_argument, NULL, 'd' },
- {"skb-mode", no_argument, NULL, 'S' },
- {"sec", required_argument, NULL, 's' },
- {"progname", required_argument, NULL, 'p' },
- {"qsize", required_argument, NULL, 'q' },
- {"cpu", required_argument, NULL, 'c' },
- {"stress-mode", no_argument, NULL, 'x' },
- {"no-separators", no_argument, NULL, 'z' },
- {"force", no_argument, NULL, 'F' },
- {0, 0, NULL, 0 }
-};
-
-static void int_exit(int sig)
-{
- __u32 curr_prog_id = 0;
+static int mask = SAMPLE_RX_CNT | SAMPLE_REDIRECT_ERR_MAP_CNT |
+ SAMPLE_CPUMAP_ENQUEUE_CNT | SAMPLE_CPUMAP_KTHREAD_CNT |
+ SAMPLE_EXCEPTION_CNT;
- if (ifindex > -1) {
- if (bpf_get_link_xdp_id(ifindex, &curr_prog_id, xdp_flags)) {
- printf("bpf_get_link_xdp_id failed\n");
- exit(EXIT_FAIL);
- }
- if (prog_id == curr_prog_id) {
- fprintf(stderr,
- "Interrupted: Removing XDP program on ifindex:%d device:%s\n",
- ifindex, ifname);
- bpf_set_link_xdp_fd(ifindex, -1, xdp_flags);
- } else if (!curr_prog_id) {
- printf("couldn't find a prog id on a given iface\n");
- } else {
- printf("program on interface changed, not removing\n");
- }
- }
- /* Detach tracepoints */
- while (tp_cnt)
- bpf_link__destroy(tp_links[--tp_cnt]);
+DEFINE_SAMPLE_INIT(xdp_redirect_cpu);
- exit(EXIT_OK);
-}
+static const struct option long_options[] = {
+ { "help", no_argument, NULL, 'h' },
+ { "dev", required_argument, NULL, 'd' },
+ { "skb-mode", no_argument, NULL, 'S' },
+ { "progname", required_argument, NULL, 'p' },
+ { "qsize", required_argument, NULL, 'q' },
+ { "cpu", required_argument, NULL, 'c' },
+ { "stress-mode", no_argument, NULL, 'x' },
+ { "force", no_argument, NULL, 'F' },
+ { "interval", required_argument, NULL, 'i' },
+ { "verbose", no_argument, NULL, 'v' },
+ { "stats", no_argument, NULL, 's' },
+ { "mprog-name", required_argument, NULL, 'e' },
+ { "mprog-filename", required_argument, NULL, 'f' },
+ { "redirect-device", required_argument, NULL, 'r' },
+ { "redirect-map", required_argument, NULL, 'm' },
+ {}
+};
static void print_avail_progs(struct bpf_object *obj)
{
struct bpf_program *pos;
+ printf(" Programs to be used for -p/--progname:\n");
bpf_object__for_each_program(pos, obj) {
- if (bpf_program__is_xdp(pos))
- printf(" %s\n", bpf_program__title(pos, false));
- }
-}
-
-static void usage(char *argv[], struct bpf_object *obj)
-{
- int i;
-
- printf("\nDOCUMENTATION:\n%s\n", __doc__);
- printf("\n");
- printf(" Usage: %s (options-see-below)\n", argv[0]);
- printf(" Listing options:\n");
- for (i = 0; long_options[i].name != 0; i++) {
- printf(" --%-12s", long_options[i].name);
- if (long_options[i].flag != NULL)
- printf(" flag (internal value:%d)",
- *long_options[i].flag);
- else
- printf(" short-option: -%c",
- long_options[i].val);
- printf("\n");
- }
- printf("\n Programs to be used for --progname:\n");
- print_avail_progs(obj);
- printf("\n");
-}
-
-/* gettime returns the current time of day in nanoseconds.
- * Cost: clock_gettime (ns) => 26ns (CLOCK_MONOTONIC)
- * clock_gettime (ns) => 9ns (CLOCK_MONOTONIC_COARSE)
- */
-#define NANOSEC_PER_SEC 1000000000 /* 10^9 */
-static __u64 gettime(void)
-{
- struct timespec t;
- int res;
-
- res = clock_gettime(CLOCK_MONOTONIC, &t);
- if (res < 0) {
- fprintf(stderr, "Error with gettimeofday! (%i)\n", res);
- exit(EXIT_FAIL);
- }
- return (__u64) t.tv_sec * NANOSEC_PER_SEC + t.tv_nsec;
-}
-
-/* Common stats data record shared with _kern.c */
-struct datarec {
- __u64 processed;
- __u64 dropped;
- __u64 issue;
-};
-struct record {
- __u64 timestamp;
- struct datarec total;
- struct datarec *cpu;
-};
-struct stats_record {
- struct record rx_cnt;
- struct record redir_err;
- struct record kthread;
- struct record exception;
- struct record enq[MAX_CPUS];
-};
-
-static bool map_collect_percpu(int fd, __u32 key, struct record *rec)
-{
- /* For percpu maps, userspace gets a value per possible CPU */
- unsigned int nr_cpus = bpf_num_possible_cpus();
- struct datarec values[nr_cpus];
- __u64 sum_processed = 0;
- __u64 sum_dropped = 0;
- __u64 sum_issue = 0;
- int i;
-
- if ((bpf_map_lookup_elem(fd, &key, values)) != 0) {
- fprintf(stderr,
- "ERR: bpf_map_lookup_elem failed key:0x%X\n", key);
- return false;
- }
- /* Get time as close as possible to reading map contents */
- rec->timestamp = gettime();
-
- /* Record and sum values from each CPU */
- for (i = 0; i < nr_cpus; i++) {
- rec->cpu[i].processed = values[i].processed;
- sum_processed += values[i].processed;
- rec->cpu[i].dropped = values[i].dropped;
- sum_dropped += values[i].dropped;
- rec->cpu[i].issue = values[i].issue;
- sum_issue += values[i].issue;
- }
- rec->total.processed = sum_processed;
- rec->total.dropped = sum_dropped;
- rec->total.issue = sum_issue;
- return true;
-}
-
-static struct datarec *alloc_record_per_cpu(void)
-{
- unsigned int nr_cpus = bpf_num_possible_cpus();
- struct datarec *array;
- size_t size;
-
- size = sizeof(struct datarec) * nr_cpus;
- array = malloc(size);
- memset(array, 0, size);
- if (!array) {
- fprintf(stderr, "Mem alloc error (nr_cpus:%u)\n", nr_cpus);
- exit(EXIT_FAIL_MEM);
- }
- return array;
-}
-
-static struct stats_record *alloc_stats_record(void)
-{
- struct stats_record *rec;
- int i;
-
- rec = malloc(sizeof(*rec));
- memset(rec, 0, sizeof(*rec));
- if (!rec) {
- fprintf(stderr, "Mem alloc error\n");
- exit(EXIT_FAIL_MEM);
- }
- rec->rx_cnt.cpu = alloc_record_per_cpu();
- rec->redir_err.cpu = alloc_record_per_cpu();
- rec->kthread.cpu = alloc_record_per_cpu();
- rec->exception.cpu = alloc_record_per_cpu();
- for (i = 0; i < MAX_CPUS; i++)
- rec->enq[i].cpu = alloc_record_per_cpu();
-
- return rec;
-}
-
-static void free_stats_record(struct stats_record *r)
-{
- int i;
-
- for (i = 0; i < MAX_CPUS; i++)
- free(r->enq[i].cpu);
- free(r->exception.cpu);
- free(r->kthread.cpu);
- free(r->redir_err.cpu);
- free(r->rx_cnt.cpu);
- free(r);
-}
-
-static double calc_period(struct record *r, struct record *p)
-{
- double period_ = 0;
- __u64 period = 0;
-
- period = r->timestamp - p->timestamp;
- if (period > 0)
- period_ = ((double) period / NANOSEC_PER_SEC);
-
- return period_;
-}
-
-static __u64 calc_pps(struct datarec *r, struct datarec *p, double period_)
-{
- __u64 packets = 0;
- __u64 pps = 0;
-
- if (period_ > 0) {
- packets = r->processed - p->processed;
- pps = packets / period_;
- }
- return pps;
-}
-
-static __u64 calc_drop_pps(struct datarec *r, struct datarec *p, double period_)
-{
- __u64 packets = 0;
- __u64 pps = 0;
-
- if (period_ > 0) {
- packets = r->dropped - p->dropped;
- pps = packets / period_;
- }
- return pps;
-}
-
-static __u64 calc_errs_pps(struct datarec *r,
- struct datarec *p, double period_)
-{
- __u64 packets = 0;
- __u64 pps = 0;
-
- if (period_ > 0) {
- packets = r->issue - p->issue;
- pps = packets / period_;
- }
- return pps;
-}
-
-static void stats_print(struct stats_record *stats_rec,
- struct stats_record *stats_prev,
- char *prog_name)
-{
- unsigned int nr_cpus = bpf_num_possible_cpus();
- double pps = 0, drop = 0, err = 0;
- struct record *rec, *prev;
- int to_cpu;
- double t;
- int i;
-
- /* Header */
- printf("Running XDP/eBPF prog_name:%s\n", prog_name);
- printf("%-15s %-7s %-14s %-11s %-9s\n",
- "XDP-cpumap", "CPU:to", "pps", "drop-pps", "extra-info");
-
- /* XDP rx_cnt */
- {
- char *fmt_rx = "%-15s %-7d %'-14.0f %'-11.0f %'-10.0f %s\n";
- char *fm2_rx = "%-15s %-7s %'-14.0f %'-11.0f\n";
- char *errstr = "";
-
- rec = &stats_rec->rx_cnt;
- prev = &stats_prev->rx_cnt;
- t = calc_period(rec, prev);
- for (i = 0; i < nr_cpus; i++) {
- struct datarec *r = &rec->cpu[i];
- struct datarec *p = &prev->cpu[i];
-
- pps = calc_pps(r, p, t);
- drop = calc_drop_pps(r, p, t);
- err = calc_errs_pps(r, p, t);
- if (err > 0)
- errstr = "cpu-dest/err";
- if (pps > 0)
- printf(fmt_rx, "XDP-RX",
- i, pps, drop, err, errstr);
- }
- pps = calc_pps(&rec->total, &prev->total, t);
- drop = calc_drop_pps(&rec->total, &prev->total, t);
- err = calc_errs_pps(&rec->total, &prev->total, t);
- printf(fm2_rx, "XDP-RX", "total", pps, drop);
- }
-
- /* cpumap enqueue stats */
- for (to_cpu = 0; to_cpu < MAX_CPUS; to_cpu++) {
- char *fmt = "%-15s %3d:%-3d %'-14.0f %'-11.0f %'-10.2f %s\n";
- char *fm2 = "%-15s %3s:%-3d %'-14.0f %'-11.0f %'-10.2f %s\n";
- char *errstr = "";
-
- rec = &stats_rec->enq[to_cpu];
- prev = &stats_prev->enq[to_cpu];
- t = calc_period(rec, prev);
- for (i = 0; i < nr_cpus; i++) {
- struct datarec *r = &rec->cpu[i];
- struct datarec *p = &prev->cpu[i];
-
- pps = calc_pps(r, p, t);
- drop = calc_drop_pps(r, p, t);
- err = calc_errs_pps(r, p, t);
- if (err > 0) {
- errstr = "bulk-average";
- err = pps / err; /* calc average bulk size */
- }
- if (pps > 0)
- printf(fmt, "cpumap-enqueue",
- i, to_cpu, pps, drop, err, errstr);
- }
- pps = calc_pps(&rec->total, &prev->total, t);
- if (pps > 0) {
- drop = calc_drop_pps(&rec->total, &prev->total, t);
- err = calc_errs_pps(&rec->total, &prev->total, t);
- if (err > 0) {
- errstr = "bulk-average";
- err = pps / err; /* calc average bulk size */
- }
- printf(fm2, "cpumap-enqueue",
- "sum", to_cpu, pps, drop, err, errstr);
- }
- }
-
- /* cpumap kthread stats */
- {
- char *fmt_k = "%-15s %-7d %'-14.0f %'-11.0f %'-10.0f %s\n";
- char *fm2_k = "%-15s %-7s %'-14.0f %'-11.0f %'-10.0f %s\n";
- char *e_str = "";
-
- rec = &stats_rec->kthread;
- prev = &stats_prev->kthread;
- t = calc_period(rec, prev);
- for (i = 0; i < nr_cpus; i++) {
- struct datarec *r = &rec->cpu[i];
- struct datarec *p = &prev->cpu[i];
-
- pps = calc_pps(r, p, t);
- drop = calc_drop_pps(r, p, t);
- err = calc_errs_pps(r, p, t);
- if (err > 0)
- e_str = "sched";
- if (pps > 0)
- printf(fmt_k, "cpumap_kthread",
- i, pps, drop, err, e_str);
- }
- pps = calc_pps(&rec->total, &prev->total, t);
- drop = calc_drop_pps(&rec->total, &prev->total, t);
- err = calc_errs_pps(&rec->total, &prev->total, t);
- if (err > 0)
- e_str = "sched-sum";
- printf(fm2_k, "cpumap_kthread", "total", pps, drop, err, e_str);
- }
-
- /* XDP redirect err tracepoints (very unlikely) */
- {
- char *fmt_err = "%-15s %-7d %'-14.0f %'-11.0f\n";
- char *fm2_err = "%-15s %-7s %'-14.0f %'-11.0f\n";
-
- rec = &stats_rec->redir_err;
- prev = &stats_prev->redir_err;
- t = calc_period(rec, prev);
- for (i = 0; i < nr_cpus; i++) {
- struct datarec *r = &rec->cpu[i];
- struct datarec *p = &prev->cpu[i];
-
- pps = calc_pps(r, p, t);
- drop = calc_drop_pps(r, p, t);
- if (pps > 0)
- printf(fmt_err, "redirect_err", i, pps, drop);
+ if (bpf_program__type(pos) == BPF_PROG_TYPE_XDP) {
+ if (!strncmp(bpf_program__name(pos), "xdp_prognum",
+ sizeof("xdp_prognum") - 1))
+ printf(" %s\n", bpf_program__name(pos));
}
- pps = calc_pps(&rec->total, &prev->total, t);
- drop = calc_drop_pps(&rec->total, &prev->total, t);
- printf(fm2_err, "redirect_err", "total", pps, drop);
}
-
- /* XDP general exception tracepoints */
- {
- char *fmt_err = "%-15s %-7d %'-14.0f %'-11.0f\n";
- char *fm2_err = "%-15s %-7s %'-14.0f %'-11.0f\n";
-
- rec = &stats_rec->exception;
- prev = &stats_prev->exception;
- t = calc_period(rec, prev);
- for (i = 0; i < nr_cpus; i++) {
- struct datarec *r = &rec->cpu[i];
- struct datarec *p = &prev->cpu[i];
-
- pps = calc_pps(r, p, t);
- drop = calc_drop_pps(r, p, t);
- if (pps > 0)
- printf(fmt_err, "xdp_exception", i, pps, drop);
- }
- pps = calc_pps(&rec->total, &prev->total, t);
- drop = calc_drop_pps(&rec->total, &prev->total, t);
- printf(fm2_err, "xdp_exception", "total", pps, drop);
- }
-
- printf("\n");
- fflush(stdout);
-}
-
-static void stats_collect(struct stats_record *rec)
-{
- int fd, i;
-
- fd = rx_cnt_map_fd;
- map_collect_percpu(fd, 0, &rec->rx_cnt);
-
- fd = redirect_err_cnt_map_fd;
- map_collect_percpu(fd, 1, &rec->redir_err);
-
- fd = cpumap_enqueue_cnt_map_fd;
- for (i = 0; i < MAX_CPUS; i++)
- map_collect_percpu(fd, i, &rec->enq[i]);
-
- fd = cpumap_kthread_cnt_map_fd;
- map_collect_percpu(fd, 0, &rec->kthread);
-
- fd = exception_cnt_map_fd;
- map_collect_percpu(fd, 0, &rec->exception);
}
-
-/* Pointer swap trick */
-static inline void swap(struct stats_record **a, struct stats_record **b)
+static void usage(char *argv[], const struct option *long_options,
+ const char *doc, int mask, bool error, struct bpf_object *obj)
{
- struct stats_record *tmp;
-
- tmp = *a;
- *a = *b;
- *b = tmp;
+ sample_usage(argv, long_options, doc, mask, error);
+ print_avail_progs(obj);
}
-static int create_cpu_entry(__u32 cpu, __u32 queue_size,
+static int create_cpu_entry(__u32 cpu, struct bpf_cpumap_val *value,
__u32 avail_idx, bool new)
{
__u32 curr_cpus_count = 0;
@@ -506,40 +94,42 @@ static int create_cpu_entry(__u32 cpu, __u32 queue_size,
/* Add a CPU entry to cpumap, as this allocate a cpu entry in
* the kernel for the cpu.
*/
- ret = bpf_map_update_elem(cpu_map_fd, &cpu, &queue_size, 0);
- if (ret) {
- fprintf(stderr, "Create CPU entry failed (err:%d)\n", ret);
- exit(EXIT_FAIL_BPF);
+ ret = bpf_map_update_elem(map_fd, &cpu, value, 0);
+ if (ret < 0) {
+ fprintf(stderr, "Create CPU entry failed: %s\n", strerror(errno));
+ return ret;
}
/* Inform bpf_prog's that a new CPU is available to select
* from via some control maps.
*/
- ret = bpf_map_update_elem(cpus_available_map_fd, &avail_idx, &cpu, 0);
- if (ret) {
- fprintf(stderr, "Add to avail CPUs failed\n");
- exit(EXIT_FAIL_BPF);
+ ret = bpf_map_update_elem(avail_fd, &avail_idx, &cpu, 0);
+ if (ret < 0) {
+ fprintf(stderr, "Add to avail CPUs failed: %s\n", strerror(errno));
+ return ret;
}
/* When not replacing/updating existing entry, bump the count */
- ret = bpf_map_lookup_elem(cpus_count_map_fd, &key, &curr_cpus_count);
- if (ret) {
- fprintf(stderr, "Failed reading curr cpus_count\n");
- exit(EXIT_FAIL_BPF);
+ ret = bpf_map_lookup_elem(count_fd, &key, &curr_cpus_count);
+ if (ret < 0) {
+ fprintf(stderr, "Failed reading curr cpus_count: %s\n",
+ strerror(errno));
+ return ret;
}
if (new) {
curr_cpus_count++;
- ret = bpf_map_update_elem(cpus_count_map_fd, &key,
+ ret = bpf_map_update_elem(count_fd, &key,
&curr_cpus_count, 0);
- if (ret) {
- fprintf(stderr, "Failed write curr cpus_count\n");
- exit(EXIT_FAIL_BPF);
+ if (ret < 0) {
+ fprintf(stderr, "Failed write curr cpus_count: %s\n",
+ strerror(errno));
+ return ret;
}
}
- /* map_fd[7] = cpus_iterator */
- printf("%s CPU:%u as idx:%u queue_size:%d (total cpus_count:%u)\n",
- new ? "Add-new":"Replace", cpu, avail_idx,
- queue_size, curr_cpus_count);
+
+ printf("%s CPU: %u as idx: %u qsize: %d cpumap_prog_fd: %d (cpus_count: %u)\n",
+ new ? "Add new" : "Replace", cpu, avail_idx,
+ value->qsize, value->bpf_prog.fd, curr_cpus_count);
return 0;
}
@@ -547,284 +137,423 @@ static int create_cpu_entry(__u32 cpu, __u32 queue_size,
/* CPUs are zero-indexed. Thus, add a special sentinel default value
* in map cpus_available to mark CPU index'es not configured
*/
-static void mark_cpus_unavailable(void)
+static int mark_cpus_unavailable(void)
{
- __u32 invalid_cpu = MAX_CPUS;
- int ret, i;
+ int ret, i, n_cpus = libbpf_num_possible_cpus();
+ __u32 invalid_cpu = n_cpus;
- for (i = 0; i < MAX_CPUS; i++) {
- ret = bpf_map_update_elem(cpus_available_map_fd, &i,
+ for (i = 0; i < n_cpus; i++) {
+ ret = bpf_map_update_elem(avail_fd, &i,
&invalid_cpu, 0);
- if (ret) {
- fprintf(stderr, "Failed marking CPU unavailable\n");
- exit(EXIT_FAIL_BPF);
+ if (ret < 0) {
+ fprintf(stderr, "Failed marking CPU unavailable: %s\n",
+ strerror(errno));
+ return ret;
}
}
+
+ return 0;
}
/* Stress cpumap management code by concurrently changing underlying cpumap */
-static void stress_cpumap(void)
+static void stress_cpumap(void *ctx)
{
+ struct bpf_cpumap_val *value = ctx;
+
/* Changing qsize will cause kernel to free and alloc a new
* bpf_cpu_map_entry, with an associated/complicated tear-down
* procedure.
*/
- create_cpu_entry(1, 1024, 0, false);
- create_cpu_entry(1, 8, 0, false);
- create_cpu_entry(1, 16000, 0, false);
+ value->qsize = 1024;
+ create_cpu_entry(1, value, 0, false);
+ value->qsize = 8;
+ create_cpu_entry(1, value, 0, false);
+ value->qsize = 16000;
+ create_cpu_entry(1, value, 0, false);
}
-static void stats_poll(int interval, bool use_separators, char *prog_name,
- bool stress_mode)
+static int set_cpumap_prog(struct xdp_redirect_cpu *skel,
+ const char *redir_interface, const char *redir_map,
+ const char *mprog_filename, const char *mprog_name)
{
- struct stats_record *record, *prev;
-
- record = alloc_stats_record();
- prev = alloc_stats_record();
- stats_collect(record);
-
- /* Trick to pretty printf with thousands separators use %' */
- if (use_separators)
- setlocale(LC_NUMERIC, "en_US");
-
- while (1) {
- swap(&prev, &record);
- stats_collect(record);
- stats_print(record, prev, prog_name);
- sleep(interval);
- if (stress_mode)
- stress_cpumap();
- }
+ if (mprog_filename) {
+ struct bpf_program *prog;
+ struct bpf_object *obj;
+ int ret;
+
+ if (!mprog_name) {
+ fprintf(stderr, "BPF program not specified for file %s\n",
+ mprog_filename);
+ goto end;
+ }
+ if ((redir_interface && !redir_map) || (!redir_interface && redir_map)) {
+ fprintf(stderr, "--redirect-%s specified but --redirect-%s not specified\n",
+ redir_interface ? "device" : "map", redir_interface ? "map" : "device");
+ goto end;
+ }
- free_stats_record(record);
- free_stats_record(prev);
-}
+ /* Custom BPF program */
+ obj = bpf_object__open_file(mprog_filename, NULL);
+ if (!obj) {
+ ret = -errno;
+ fprintf(stderr, "Failed to bpf_prog_load_xattr: %s\n",
+ strerror(errno));
+ return ret;
+ }
-static struct bpf_link * attach_tp(struct bpf_object *obj,
- const char *tp_category,
- const char* tp_name)
-{
- struct bpf_program *prog;
- struct bpf_link *link;
- char sec_name[PATH_MAX];
- int len;
-
- len = snprintf(sec_name, PATH_MAX, "tracepoint/%s/%s",
- tp_category, tp_name);
- if (len < 0)
- exit(EXIT_FAIL);
-
- prog = bpf_object__find_program_by_title(obj, sec_name);
- if (!prog) {
- fprintf(stderr, "ERR: finding progsec: %s\n", sec_name);
- exit(EXIT_FAIL_BPF);
- }
+ ret = bpf_object__load(obj);
+ if (ret < 0) {
+ ret = -errno;
+ fprintf(stderr, "Failed to bpf_object__load: %s\n",
+ strerror(errno));
+ return ret;
+ }
- link = bpf_program__attach_tracepoint(prog, tp_category, tp_name);
- if (IS_ERR(link))
- exit(EXIT_FAIL_BPF);
+ if (redir_map) {
+ int err, redir_map_fd, ifindex_out, key = 0;
- return link;
-}
+ redir_map_fd = bpf_object__find_map_fd_by_name(obj, redir_map);
+ if (redir_map_fd < 0) {
+ fprintf(stderr, "Failed to bpf_object__find_map_fd_by_name: %s\n",
+ strerror(errno));
+ return redir_map_fd;
+ }
-static void init_tracepoints(struct bpf_object *obj) {
- tp_links[tp_cnt++] = attach_tp(obj, "xdp", "xdp_redirect_err");
- tp_links[tp_cnt++] = attach_tp(obj, "xdp", "xdp_redirect_map_err");
- tp_links[tp_cnt++] = attach_tp(obj, "xdp", "xdp_exception");
- tp_links[tp_cnt++] = attach_tp(obj, "xdp", "xdp_cpumap_enqueue");
- tp_links[tp_cnt++] = attach_tp(obj, "xdp", "xdp_cpumap_kthread");
-}
+ ifindex_out = if_nametoindex(redir_interface);
+ if (!ifindex_out)
+ ifindex_out = strtoul(redir_interface, NULL, 0);
+ if (!ifindex_out) {
+ fprintf(stderr, "Bad interface name or index\n");
+ return -EINVAL;
+ }
-static int init_map_fds(struct bpf_object *obj)
-{
- /* Maps updated by tracepoints */
- redirect_err_cnt_map_fd =
- bpf_object__find_map_fd_by_name(obj, "redirect_err_cnt");
- exception_cnt_map_fd =
- bpf_object__find_map_fd_by_name(obj, "exception_cnt");
- cpumap_enqueue_cnt_map_fd =
- bpf_object__find_map_fd_by_name(obj, "cpumap_enqueue_cnt");
- cpumap_kthread_cnt_map_fd =
- bpf_object__find_map_fd_by_name(obj, "cpumap_kthread_cnt");
-
- /* Maps used by XDP */
- rx_cnt_map_fd = bpf_object__find_map_fd_by_name(obj, "rx_cnt");
- cpu_map_fd = bpf_object__find_map_fd_by_name(obj, "cpu_map");
- cpus_available_map_fd =
- bpf_object__find_map_fd_by_name(obj, "cpus_available");
- cpus_count_map_fd = bpf_object__find_map_fd_by_name(obj, "cpus_count");
- cpus_iterator_map_fd =
- bpf_object__find_map_fd_by_name(obj, "cpus_iterator");
-
- if (cpu_map_fd < 0 || rx_cnt_map_fd < 0 ||
- redirect_err_cnt_map_fd < 0 || cpumap_enqueue_cnt_map_fd < 0 ||
- cpumap_kthread_cnt_map_fd < 0 || cpus_available_map_fd < 0 ||
- cpus_count_map_fd < 0 || cpus_iterator_map_fd < 0 ||
- exception_cnt_map_fd < 0)
- return -ENOENT;
+ err = bpf_map_update_elem(redir_map_fd, &key, &ifindex_out, 0);
+ if (err < 0)
+ return err;
+ }
+ prog = bpf_object__find_program_by_name(obj, mprog_name);
+ if (!prog) {
+ ret = -errno;
+ fprintf(stderr, "Failed to bpf_object__find_program_by_name: %s\n",
+ strerror(errno));
+ return ret;
+ }
+
+ return bpf_program__fd(prog);
+ } else {
+ if (mprog_name) {
+ if (redir_interface || redir_map) {
+ fprintf(stderr, "Need to specify --mprog-filename/-f\n");
+ goto end;
+ }
+ if (!strcmp(mprog_name, "pass") || !strcmp(mprog_name, "drop")) {
+ /* Use built-in pass/drop programs */
+ return *mprog_name == 'p' ? bpf_program__fd(skel->progs.xdp_redirect_cpu_pass)
+ : bpf_program__fd(skel->progs.xdp_redirect_cpu_drop);
+ } else {
+ fprintf(stderr, "Unknown name \"%s\" for built-in BPF program\n",
+ mprog_name);
+ goto end;
+ }
+ } else {
+ if (redir_map) {
+ fprintf(stderr, "Need to specify --mprog-filename, --mprog-name and"
+ " --redirect-device with --redirect-map\n");
+ goto end;
+ }
+ if (redir_interface) {
+ /* Use built-in devmap redirect */
+ struct bpf_devmap_val val = {};
+ int ifindex_out, err;
+ __u32 key = 0;
+
+ if (!redir_interface)
+ return 0;
+
+ ifindex_out = if_nametoindex(redir_interface);
+ if (!ifindex_out)
+ ifindex_out = strtoul(redir_interface, NULL, 0);
+ if (!ifindex_out) {
+ fprintf(stderr, "Bad interface name or index\n");
+ return -EINVAL;
+ }
+
+ if (get_mac_addr(ifindex_out, skel->bss->tx_mac_addr) < 0) {
+ printf("Get interface %d mac failed\n", ifindex_out);
+ return -EINVAL;
+ }
+
+ val.ifindex = ifindex_out;
+ val.bpf_prog.fd = bpf_program__fd(skel->progs.xdp_redirect_egress_prog);
+ err = bpf_map_update_elem(bpf_map__fd(skel->maps.tx_port), &key, &val, 0);
+ if (err < 0)
+ return -errno;
+
+ return bpf_program__fd(skel->progs.xdp_redirect_cpu_devmap);
+ }
+ }
+ }
+
+ /* Disabled */
return 0;
+end:
+ fprintf(stderr, "Invalid options for CPUMAP BPF program\n");
+ return -EINVAL;
}
int main(int argc, char **argv)
{
- struct rlimit r = {10 * 1024 * 1024, RLIM_INFINITY};
- char *prog_name = "xdp_cpu_map5_lb_hash_ip_pairs";
- struct bpf_prog_load_attr prog_load_attr = {
- .prog_type = BPF_PROG_TYPE_UNSPEC,
- };
- struct bpf_prog_info info = {};
- __u32 info_len = sizeof(info);
- bool use_separators = true;
+ const char *redir_interface = NULL, *redir_map = NULL;
+ const char *mprog_filename = NULL, *mprog_name = NULL;
+ struct xdp_redirect_cpu *skel;
+ struct bpf_map_info info = {};
+ struct bpf_cpumap_val value;
+ __u32 infosz = sizeof(info);
+ int ret = EXIT_FAIL_OPTION;
+ unsigned long interval = 2;
bool stress_mode = false;
struct bpf_program *prog;
- struct bpf_object *obj;
- char filename[256];
+ const char *prog_name;
+ bool generic = false;
+ bool force = false;
int added_cpus = 0;
+ bool error = true;
int longindex = 0;
- int interval = 2;
int add_cpu = -1;
- int opt, err;
- int prog_fd;
+ int ifindex = -1;
+ int *cpu, i, opt;
__u32 qsize;
-
- /* Notice: choosing he queue size is very important with the
- * ixgbe driver, because it's driver page recycling trick is
- * dependend on pages being returned quickly. The number of
- * out-standing packets in the system must be less-than 2x
- * RX-ring size.
+ int n_cpus;
+
+ n_cpus = libbpf_num_possible_cpus();
+
+ /* Notice: Choosing the queue size is very important when CPU is
+ * configured with power-saving states.
+ *
+ * If deepest state take 133 usec to wakeup from (133/10^6). When link
+ * speed is 10Gbit/s ((10*10^9/8) in bytes/sec). How many bytes can
+ * arrive with in 133 usec at this speed: (10*10^9/8)*(133/10^6) =
+ * 166250 bytes. With MTU size packets this is 110 packets, and with
+ * minimum Ethernet (MAC-preamble + intergap) 84 bytes is 1979 packets.
+ *
+ * Setting default cpumap queue to 2048 as worst-case (small packet)
+ * should be +64 packet due kthread wakeup call (due to xdp_do_flush)
+ * worst-case is 2043 packets.
+ *
+ * Sysadm can configured system to avoid deep-sleep via:
+ * tuned-adm profile network-latency
*/
- qsize = 128+64;
+ qsize = 2048;
- snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]);
- prog_load_attr.file = filename;
+ skel = xdp_redirect_cpu__open();
+ if (!skel) {
+ fprintf(stderr, "Failed to xdp_redirect_cpu__open: %s\n",
+ strerror(errno));
+ ret = EXIT_FAIL_BPF;
+ goto end;
+ }
- if (setrlimit(RLIMIT_MEMLOCK, &r)) {
- perror("setrlimit(RLIMIT_MEMLOCK)");
- return 1;
+ ret = sample_init_pre_load(skel);
+ if (ret < 0) {
+ fprintf(stderr, "Failed to sample_init_pre_load: %s\n", strerror(-ret));
+ ret = EXIT_FAIL_BPF;
+ goto end_destroy;
}
- if (bpf_prog_load_xattr(&prog_load_attr, &obj, &prog_fd))
- return EXIT_FAIL;
+ if (bpf_map__set_max_entries(skel->maps.cpu_map, n_cpus) < 0) {
+ fprintf(stderr, "Failed to set max entries for cpu_map map: %s",
+ strerror(errno));
+ ret = EXIT_FAIL_BPF;
+ goto end_destroy;
+ }
- if (prog_fd < 0) {
- fprintf(stderr, "ERR: bpf_prog_load_xattr: %s\n",
+ if (bpf_map__set_max_entries(skel->maps.cpus_available, n_cpus) < 0) {
+ fprintf(stderr, "Failed to set max entries for cpus_available map: %s",
strerror(errno));
- return EXIT_FAIL;
+ ret = EXIT_FAIL_BPF;
+ goto end_destroy;
}
- init_tracepoints(obj);
- if (init_map_fds(obj) < 0) {
- fprintf(stderr, "bpf_object__find_map_fd_by_name failed\n");
- return EXIT_FAIL;
+
+ cpu = calloc(n_cpus, sizeof(int));
+ if (!cpu) {
+ fprintf(stderr, "Failed to allocate cpu array\n");
+ goto end_destroy;
}
- mark_cpus_unavailable();
- /* Parse commands line args */
- while ((opt = getopt_long(argc, argv, "hSd:s:p:q:c:xzF",
+ prog = skel->progs.xdp_prognum5_lb_hash_ip_pairs;
+ while ((opt = getopt_long(argc, argv, "d:si:Sxp:f:e:r:m:c:q:Fvh",
long_options, &longindex)) != -1) {
switch (opt) {
case 'd':
if (strlen(optarg) >= IF_NAMESIZE) {
- fprintf(stderr, "ERR: --dev name too long\n");
- goto error;
+ fprintf(stderr, "-d/--dev name too long\n");
+ usage(argv, long_options, __doc__, mask, true, skel->obj);
+ goto end_cpu;
}
- ifname = (char *)&ifname_buf;
- strncpy(ifname, optarg, IF_NAMESIZE);
- ifindex = if_nametoindex(ifname);
- if (ifindex == 0) {
- fprintf(stderr,
- "ERR: --dev name unknown err(%d):%s\n",
+ ifindex = if_nametoindex(optarg);
+ if (!ifindex)
+ ifindex = strtoul(optarg, NULL, 0);
+ if (!ifindex) {
+ fprintf(stderr, "Bad interface index or name (%d): %s\n",
errno, strerror(errno));
- goto error;
+ usage(argv, long_options, __doc__, mask, true, skel->obj);
+ goto end_cpu;
}
break;
case 's':
- interval = atoi(optarg);
+ mask |= SAMPLE_REDIRECT_MAP_CNT;
+ break;
+ case 'i':
+ interval = strtoul(optarg, NULL, 0);
break;
case 'S':
- xdp_flags |= XDP_FLAGS_SKB_MODE;
+ generic = true;
break;
case 'x':
stress_mode = true;
break;
- case 'z':
- use_separators = false;
- break;
case 'p':
/* Selecting eBPF prog to load */
prog_name = optarg;
+ prog = bpf_object__find_program_by_name(skel->obj,
+ prog_name);
+ if (!prog) {
+ fprintf(stderr,
+ "Failed to find program %s specified by"
+ " option -p/--progname\n",
+ prog_name);
+ print_avail_progs(skel->obj);
+ goto end_cpu;
+ }
+ break;
+ case 'f':
+ mprog_filename = optarg;
+ break;
+ case 'e':
+ mprog_name = optarg;
+ break;
+ case 'r':
+ redir_interface = optarg;
+ mask |= SAMPLE_DEVMAP_XMIT_CNT_MULTI;
+ break;
+ case 'm':
+ redir_map = optarg;
break;
case 'c':
/* Add multiple CPUs */
add_cpu = strtoul(optarg, NULL, 0);
- if (add_cpu >= MAX_CPUS) {
+ if (add_cpu >= n_cpus) {
fprintf(stderr,
- "--cpu nr too large for cpumap err(%d):%s\n",
+ "--cpu nr too large for cpumap err (%d):%s\n",
errno, strerror(errno));
- goto error;
+ usage(argv, long_options, __doc__, mask, true, skel->obj);
+ goto end_cpu;
}
- create_cpu_entry(add_cpu, qsize, added_cpus, true);
- added_cpus++;
+ cpu[added_cpus++] = add_cpu;
break;
case 'q':
- qsize = atoi(optarg);
+ qsize = strtoul(optarg, NULL, 0);
break;
case 'F':
- xdp_flags &= ~XDP_FLAGS_UPDATE_IF_NOEXIST;
+ force = true;
+ break;
+ case 'v':
+ sample_switch_mode();
break;
case 'h':
- error:
+ error = false;
default:
- usage(argv, obj);
- return EXIT_FAIL_OPTION;
+ usage(argv, long_options, __doc__, mask, error, skel->obj);
+ goto end_cpu;
}
}
- if (!(xdp_flags & XDP_FLAGS_SKB_MODE))
- xdp_flags |= XDP_FLAGS_DRV_MODE;
-
- /* Required option */
+ ret = EXIT_FAIL_OPTION;
if (ifindex == -1) {
- fprintf(stderr, "ERR: required option --dev missing\n");
- usage(argv, obj);
- return EXIT_FAIL_OPTION;
+ fprintf(stderr, "Required option --dev missing\n");
+ usage(argv, long_options, __doc__, mask, true, skel->obj);
+ goto end_cpu;
}
- /* Required option */
+
if (add_cpu == -1) {
- fprintf(stderr, "ERR: required option --cpu missing\n");
- fprintf(stderr, " Specify multiple --cpu option to add more\n");
- usage(argv, obj);
- return EXIT_FAIL_OPTION;
+ fprintf(stderr, "Required option --cpu missing\n"
+ "Specify multiple --cpu option to add more\n");
+ usage(argv, long_options, __doc__, mask, true, skel->obj);
+ goto end_cpu;
}
- /* Remove XDP program when program is interrupted or killed */
- signal(SIGINT, int_exit);
- signal(SIGTERM, int_exit);
+ skel->rodata->from_match[0] = ifindex;
+ if (redir_interface)
+ skel->rodata->to_match[0] = if_nametoindex(redir_interface);
- prog = bpf_object__find_program_by_title(obj, prog_name);
- if (!prog) {
- fprintf(stderr, "bpf_object__find_program_by_title failed\n");
- return EXIT_FAIL;
+ ret = xdp_redirect_cpu__load(skel);
+ if (ret < 0) {
+ fprintf(stderr, "Failed to xdp_redirect_cpu__load: %s\n",
+ strerror(errno));
+ goto end_cpu;
}
- prog_fd = bpf_program__fd(prog);
- if (prog_fd < 0) {
- fprintf(stderr, "bpf_program__fd failed\n");
- return EXIT_FAIL;
+ ret = bpf_obj_get_info_by_fd(bpf_map__fd(skel->maps.cpu_map), &info, &infosz);
+ if (ret < 0) {
+ fprintf(stderr, "Failed bpf_obj_get_info_by_fd for cpumap: %s\n",
+ strerror(errno));
+ goto end_cpu;
}
- if (bpf_set_link_xdp_fd(ifindex, prog_fd, xdp_flags) < 0) {
- fprintf(stderr, "link set xdp fd failed\n");
- return EXIT_FAIL_XDP;
+ skel->bss->cpumap_map_id = info.id;
+
+ map_fd = bpf_map__fd(skel->maps.cpu_map);
+ avail_fd = bpf_map__fd(skel->maps.cpus_available);
+ count_fd = bpf_map__fd(skel->maps.cpus_count);
+
+ ret = mark_cpus_unavailable();
+ if (ret < 0) {
+ fprintf(stderr, "Unable to mark CPUs as unavailable\n");
+ goto end_cpu;
}
- err = bpf_obj_get_info_by_fd(prog_fd, &info, &info_len);
- if (err) {
- printf("can't get prog info - %s\n", strerror(errno));
- return err;
+ ret = sample_init(skel, mask);
+ if (ret < 0) {
+ fprintf(stderr, "Failed to initialize sample: %s\n", strerror(-ret));
+ ret = EXIT_FAIL;
+ goto end_cpu;
}
- prog_id = info.id;
- stats_poll(interval, use_separators, prog_name, stress_mode);
- return EXIT_OK;
+ value.bpf_prog.fd = set_cpumap_prog(skel, redir_interface, redir_map,
+ mprog_filename, mprog_name);
+ if (value.bpf_prog.fd < 0) {
+ fprintf(stderr, "Failed to set CPUMAP BPF program: %s\n",
+ strerror(-value.bpf_prog.fd));
+ usage(argv, long_options, __doc__, mask, true, skel->obj);
+ ret = EXIT_FAIL_BPF;
+ goto end_cpu;
+ }
+ value.qsize = qsize;
+
+ for (i = 0; i < added_cpus; i++) {
+ if (create_cpu_entry(cpu[i], &value, i, true) < 0) {
+ fprintf(stderr, "Cannot proceed, exiting\n");
+ usage(argv, long_options, __doc__, mask, true, skel->obj);
+ goto end_cpu;
+ }
+ }
+
+ ret = EXIT_FAIL_XDP;
+ if (sample_install_xdp(prog, ifindex, generic, force) < 0)
+ goto end_cpu;
+
+ ret = sample_run(interval, stress_mode ? stress_cpumap : NULL, &value);
+ if (ret < 0) {
+ fprintf(stderr, "Failed during sample run: %s\n", strerror(-ret));
+ ret = EXIT_FAIL;
+ goto end_cpu;
+ }
+ ret = EXIT_OK;
+end_cpu:
+ free(cpu);
+end_destroy:
+ xdp_redirect_cpu__destroy(skel);
+end:
+ sample_exit(ret);
}
diff --git a/samples/bpf/xdp_redirect_kern.c b/samples/bpf/xdp_redirect_kern.c
deleted file mode 100644
index d26ec3aa215e..000000000000
--- a/samples/bpf/xdp_redirect_kern.c
+++ /dev/null
@@ -1,90 +0,0 @@
-/* Copyright (c) 2016 John Fastabend <john.r.fastabend@intel.com>
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of version 2 of the GNU General Public
- * License as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- */
-#define KBUILD_MODNAME "foo"
-#include <uapi/linux/bpf.h>
-#include <linux/in.h>
-#include <linux/if_ether.h>
-#include <linux/if_packet.h>
-#include <linux/if_vlan.h>
-#include <linux/ip.h>
-#include <linux/ipv6.h>
-#include <bpf/bpf_helpers.h>
-
-struct {
- __uint(type, BPF_MAP_TYPE_ARRAY);
- __type(key, int);
- __type(value, int);
- __uint(max_entries, 1);
-} tx_port SEC(".maps");
-
-/* Count RX packets, as XDP bpf_prog doesn't get direct TX-success
- * feedback. Redirect TX errors can be caught via a tracepoint.
- */
-struct {
- __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY);
- __type(key, u32);
- __type(value, long);
- __uint(max_entries, 1);
-} rxcnt SEC(".maps");
-
-static void swap_src_dst_mac(void *data)
-{
- unsigned short *p = data;
- unsigned short dst[3];
-
- dst[0] = p[0];
- dst[1] = p[1];
- dst[2] = p[2];
- p[0] = p[3];
- p[1] = p[4];
- p[2] = p[5];
- p[3] = dst[0];
- p[4] = dst[1];
- p[5] = dst[2];
-}
-
-SEC("xdp_redirect")
-int xdp_redirect_prog(struct xdp_md *ctx)
-{
- void *data_end = (void *)(long)ctx->data_end;
- void *data = (void *)(long)ctx->data;
- struct ethhdr *eth = data;
- int rc = XDP_DROP;
- int *ifindex, port = 0;
- long *value;
- u32 key = 0;
- u64 nh_off;
-
- nh_off = sizeof(*eth);
- if (data + nh_off > data_end)
- return rc;
-
- ifindex = bpf_map_lookup_elem(&tx_port, &port);
- if (!ifindex)
- return rc;
-
- value = bpf_map_lookup_elem(&rxcnt, &key);
- if (value)
- *value += 1;
-
- swap_src_dst_mac(data);
- return bpf_redirect(*ifindex, 0);
-}
-
-/* Redirect require an XDP bpf_prog loaded on the TX device */
-SEC("xdp_redirect_dummy")
-int xdp_redirect_dummy_prog(struct xdp_md *ctx)
-{
- return XDP_PASS;
-}
-
-char _license[] SEC("license") = "GPL";
diff --git a/samples/bpf/xdp_redirect_map.bpf.c b/samples/bpf/xdp_redirect_map.bpf.c
new file mode 100644
index 000000000000..8557c278df77
--- /dev/null
+++ b/samples/bpf/xdp_redirect_map.bpf.c
@@ -0,0 +1,97 @@
+/* Copyright (c) 2017 Covalent IO, Inc. http://covalent.io
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ */
+#define KBUILD_MODNAME "foo"
+
+#include "vmlinux.h"
+#include "xdp_sample.bpf.h"
+#include "xdp_sample_shared.h"
+
+/* The 2nd xdp prog on egress does not support skb mode, so we define two
+ * maps, tx_port_general and tx_port_native.
+ */
+struct {
+ __uint(type, BPF_MAP_TYPE_DEVMAP);
+ __uint(key_size, sizeof(int));
+ __uint(value_size, sizeof(int));
+ __uint(max_entries, 1);
+} tx_port_general SEC(".maps");
+
+struct {
+ __uint(type, BPF_MAP_TYPE_DEVMAP);
+ __uint(key_size, sizeof(int));
+ __uint(value_size, sizeof(struct bpf_devmap_val));
+ __uint(max_entries, 1);
+} tx_port_native SEC(".maps");
+
+/* store egress interface mac address */
+const volatile __u8 tx_mac_addr[ETH_ALEN];
+
+static __always_inline int xdp_redirect_map(struct xdp_md *ctx, void *redirect_map)
+{
+ void *data_end = (void *)(long)ctx->data_end;
+ void *data = (void *)(long)ctx->data;
+ u32 key = bpf_get_smp_processor_id();
+ struct ethhdr *eth = data;
+ struct datarec *rec;
+ u64 nh_off;
+
+ nh_off = sizeof(*eth);
+ if (data + nh_off > data_end)
+ return XDP_DROP;
+
+ rec = bpf_map_lookup_elem(&rx_cnt, &key);
+ if (!rec)
+ return XDP_PASS;
+ NO_TEAR_INC(rec->processed);
+ swap_src_dst_mac(data);
+ return bpf_redirect_map(redirect_map, 0, 0);
+}
+
+SEC("xdp")
+int xdp_redirect_map_general(struct xdp_md *ctx)
+{
+ return xdp_redirect_map(ctx, &tx_port_general);
+}
+
+SEC("xdp")
+int xdp_redirect_map_native(struct xdp_md *ctx)
+{
+ return xdp_redirect_map(ctx, &tx_port_native);
+}
+
+SEC("xdp/devmap")
+int xdp_redirect_map_egress(struct xdp_md *ctx)
+{
+ void *data_end = (void *)(long)ctx->data_end;
+ void *data = (void *)(long)ctx->data;
+ u8 *mac_addr = (u8 *) tx_mac_addr;
+ struct ethhdr *eth = data;
+ u64 nh_off;
+
+ nh_off = sizeof(*eth);
+ if (data + nh_off > data_end)
+ return XDP_DROP;
+
+ barrier_var(mac_addr); /* prevent optimizing out memcpy */
+ __builtin_memcpy(eth->h_source, mac_addr, ETH_ALEN);
+
+ return XDP_PASS;
+}
+
+/* Redirect require an XDP bpf_prog loaded on the TX device */
+SEC("xdp")
+int xdp_redirect_dummy_prog(struct xdp_md *ctx)
+{
+ return XDP_PASS;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/samples/bpf/xdp_redirect_map_kern.c b/samples/bpf/xdp_redirect_map_kern.c
deleted file mode 100644
index 6489352ab7a4..000000000000
--- a/samples/bpf/xdp_redirect_map_kern.c
+++ /dev/null
@@ -1,92 +0,0 @@
-/* Copyright (c) 2017 Covalent IO, Inc. http://covalent.io
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of version 2 of the GNU General Public
- * License as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- */
-#define KBUILD_MODNAME "foo"
-#include <uapi/linux/bpf.h>
-#include <linux/in.h>
-#include <linux/if_ether.h>
-#include <linux/if_packet.h>
-#include <linux/if_vlan.h>
-#include <linux/ip.h>
-#include <linux/ipv6.h>
-#include <bpf/bpf_helpers.h>
-
-struct {
- __uint(type, BPF_MAP_TYPE_DEVMAP);
- __uint(key_size, sizeof(int));
- __uint(value_size, sizeof(int));
- __uint(max_entries, 100);
-} tx_port SEC(".maps");
-
-/* Count RX packets, as XDP bpf_prog doesn't get direct TX-success
- * feedback. Redirect TX errors can be caught via a tracepoint.
- */
-struct {
- __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY);
- __type(key, u32);
- __type(value, long);
- __uint(max_entries, 1);
-} rxcnt SEC(".maps");
-
-static void swap_src_dst_mac(void *data)
-{
- unsigned short *p = data;
- unsigned short dst[3];
-
- dst[0] = p[0];
- dst[1] = p[1];
- dst[2] = p[2];
- p[0] = p[3];
- p[1] = p[4];
- p[2] = p[5];
- p[3] = dst[0];
- p[4] = dst[1];
- p[5] = dst[2];
-}
-
-SEC("xdp_redirect_map")
-int xdp_redirect_map_prog(struct xdp_md *ctx)
-{
- void *data_end = (void *)(long)ctx->data_end;
- void *data = (void *)(long)ctx->data;
- struct ethhdr *eth = data;
- int rc = XDP_DROP;
- int vport, port = 0, m = 0;
- long *value;
- u32 key = 0;
- u64 nh_off;
-
- nh_off = sizeof(*eth);
- if (data + nh_off > data_end)
- return rc;
-
- /* constant virtual port */
- vport = 0;
-
- /* count packet in global counter */
- value = bpf_map_lookup_elem(&rxcnt, &key);
- if (value)
- *value += 1;
-
- swap_src_dst_mac(data);
-
- /* send packet out physical port */
- return bpf_redirect_map(&tx_port, vport, 0);
-}
-
-/* Redirect require an XDP bpf_prog loaded on the TX device */
-SEC("xdp_redirect_dummy")
-int xdp_redirect_dummy_prog(struct xdp_md *ctx)
-{
- return XDP_PASS;
-}
-
-char _license[] SEC("license") = "GPL";
diff --git a/samples/bpf/xdp_redirect_map_multi.bpf.c b/samples/bpf/xdp_redirect_map_multi.bpf.c
new file mode 100644
index 000000000000..8b2fd4ec2c76
--- /dev/null
+++ b/samples/bpf/xdp_redirect_map_multi.bpf.c
@@ -0,0 +1,77 @@
+// SPDX-License-Identifier: GPL-2.0
+#define KBUILD_MODNAME "foo"
+
+#include "vmlinux.h"
+#include "xdp_sample.bpf.h"
+#include "xdp_sample_shared.h"
+
+struct {
+ __uint(type, BPF_MAP_TYPE_DEVMAP_HASH);
+ __uint(key_size, sizeof(int));
+ __uint(value_size, sizeof(int));
+ __uint(max_entries, 32);
+} forward_map_general SEC(".maps");
+
+struct {
+ __uint(type, BPF_MAP_TYPE_DEVMAP_HASH);
+ __uint(key_size, sizeof(int));
+ __uint(value_size, sizeof(struct bpf_devmap_val));
+ __uint(max_entries, 32);
+} forward_map_native SEC(".maps");
+
+/* map to store egress interfaces mac addresses */
+struct {
+ __uint(type, BPF_MAP_TYPE_HASH);
+ __type(key, u32);
+ __type(value, __be64);
+ __uint(max_entries, 32);
+} mac_map SEC(".maps");
+
+static int xdp_redirect_map(struct xdp_md *ctx, void *forward_map)
+{
+ u32 key = bpf_get_smp_processor_id();
+ struct datarec *rec;
+
+ rec = bpf_map_lookup_elem(&rx_cnt, &key);
+ if (!rec)
+ return XDP_PASS;
+ NO_TEAR_INC(rec->processed);
+
+ return bpf_redirect_map(forward_map, 0,
+ BPF_F_BROADCAST | BPF_F_EXCLUDE_INGRESS);
+}
+
+SEC("xdp")
+int xdp_redirect_map_general(struct xdp_md *ctx)
+{
+ return xdp_redirect_map(ctx, &forward_map_general);
+}
+
+SEC("xdp")
+int xdp_redirect_map_native(struct xdp_md *ctx)
+{
+ return xdp_redirect_map(ctx, &forward_map_native);
+}
+
+SEC("xdp/devmap")
+int xdp_devmap_prog(struct xdp_md *ctx)
+{
+ void *data_end = (void *)(long)ctx->data_end;
+ void *data = (void *)(long)ctx->data;
+ u32 key = ctx->egress_ifindex;
+ struct ethhdr *eth = data;
+ __be64 *mac;
+ u64 nh_off;
+
+ nh_off = sizeof(*eth);
+ if (data + nh_off > data_end)
+ return XDP_DROP;
+
+ mac = bpf_map_lookup_elem(&mac_map, &key);
+ if (mac)
+ __builtin_memcpy(eth->h_source, mac, ETH_ALEN);
+
+ return XDP_PASS;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/samples/bpf/xdp_redirect_map_multi_user.c b/samples/bpf/xdp_redirect_map_multi_user.c
new file mode 100644
index 000000000000..9e24f2705b67
--- /dev/null
+++ b/samples/bpf/xdp_redirect_map_multi_user.c
@@ -0,0 +1,232 @@
+// SPDX-License-Identifier: GPL-2.0
+static const char *__doc__ =
+"XDP multi redirect tool, using BPF_MAP_TYPE_DEVMAP and BPF_F_BROADCAST flag for bpf_redirect_map\n"
+"Usage: xdp_redirect_map_multi <IFINDEX|IFNAME> <IFINDEX|IFNAME> ... <IFINDEX|IFNAME>\n";
+
+#include <linux/bpf.h>
+#include <linux/if_link.h>
+#include <assert.h>
+#include <getopt.h>
+#include <errno.h>
+#include <signal.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <net/if.h>
+#include <unistd.h>
+#include <libgen.h>
+#include <sys/ioctl.h>
+#include <sys/types.h>
+#include <sys/socket.h>
+#include <netinet/in.h>
+#include <linux/if_ether.h>
+#include <bpf/bpf.h>
+#include <bpf/libbpf.h>
+#include "bpf_util.h"
+#include "xdp_sample_user.h"
+#include "xdp_redirect_map_multi.skel.h"
+
+#define MAX_IFACE_NUM 32
+static int ifaces[MAX_IFACE_NUM] = {};
+
+static int mask = SAMPLE_RX_CNT | SAMPLE_REDIRECT_ERR_MAP_CNT |
+ SAMPLE_EXCEPTION_CNT | SAMPLE_DEVMAP_XMIT_CNT |
+ SAMPLE_DEVMAP_XMIT_CNT_MULTI | SAMPLE_SKIP_HEADING;
+
+DEFINE_SAMPLE_INIT(xdp_redirect_map_multi);
+
+static const struct option long_options[] = {
+ { "help", no_argument, NULL, 'h' },
+ { "skb-mode", no_argument, NULL, 'S' },
+ { "force", no_argument, NULL, 'F' },
+ { "load-egress", no_argument, NULL, 'X' },
+ { "stats", no_argument, NULL, 's' },
+ { "interval", required_argument, NULL, 'i' },
+ { "verbose", no_argument, NULL, 'v' },
+ {}
+};
+
+static int update_mac_map(struct bpf_map *map)
+{
+ int mac_map_fd = bpf_map__fd(map);
+ unsigned char mac_addr[6];
+ unsigned int ifindex;
+ int i, ret = -1;
+
+ for (i = 0; ifaces[i] > 0; i++) {
+ ifindex = ifaces[i];
+
+ ret = get_mac_addr(ifindex, mac_addr);
+ if (ret < 0) {
+ fprintf(stderr, "get interface %d mac failed\n",
+ ifindex);
+ return ret;
+ }
+
+ ret = bpf_map_update_elem(mac_map_fd, &ifindex, mac_addr, 0);
+ if (ret < 0) {
+ fprintf(stderr, "Failed to update mac address for ifindex %d\n",
+ ifindex);
+ return ret;
+ }
+ }
+
+ return 0;
+}
+
+int main(int argc, char **argv)
+{
+ struct bpf_devmap_val devmap_val = {};
+ struct xdp_redirect_map_multi *skel;
+ struct bpf_program *ingress_prog;
+ bool xdp_devmap_attached = false;
+ struct bpf_map *forward_map;
+ int ret = EXIT_FAIL_OPTION;
+ unsigned long interval = 2;
+ char ifname[IF_NAMESIZE];
+ unsigned int ifindex;
+ bool generic = false;
+ bool force = false;
+ bool tried = false;
+ bool error = true;
+ int i, opt;
+
+ while ((opt = getopt_long(argc, argv, "hSFXi:vs",
+ long_options, NULL)) != -1) {
+ switch (opt) {
+ case 'S':
+ generic = true;
+ /* devmap_xmit tracepoint not available */
+ mask &= ~(SAMPLE_DEVMAP_XMIT_CNT |
+ SAMPLE_DEVMAP_XMIT_CNT_MULTI);
+ break;
+ case 'F':
+ force = true;
+ break;
+ case 'X':
+ xdp_devmap_attached = true;
+ break;
+ case 'i':
+ interval = strtoul(optarg, NULL, 0);
+ break;
+ case 'v':
+ sample_switch_mode();
+ break;
+ case 's':
+ mask |= SAMPLE_REDIRECT_MAP_CNT;
+ break;
+ case 'h':
+ error = false;
+ default:
+ sample_usage(argv, long_options, __doc__, mask, error);
+ return ret;
+ }
+ }
+
+ if (argc <= optind + 1) {
+ sample_usage(argv, long_options, __doc__, mask, error);
+ return ret;
+ }
+
+ skel = xdp_redirect_map_multi__open();
+ if (!skel) {
+ fprintf(stderr, "Failed to xdp_redirect_map_multi__open: %s\n",
+ strerror(errno));
+ ret = EXIT_FAIL_BPF;
+ goto end;
+ }
+
+ ret = sample_init_pre_load(skel);
+ if (ret < 0) {
+ fprintf(stderr, "Failed to sample_init_pre_load: %s\n", strerror(-ret));
+ ret = EXIT_FAIL_BPF;
+ goto end_destroy;
+ }
+
+ ret = EXIT_FAIL_OPTION;
+ for (i = 0; i < MAX_IFACE_NUM && argv[optind + i]; i++) {
+ ifaces[i] = if_nametoindex(argv[optind + i]);
+ if (!ifaces[i])
+ ifaces[i] = strtoul(argv[optind + i], NULL, 0);
+ if (!if_indextoname(ifaces[i], ifname)) {
+ fprintf(stderr, "Bad interface index or name\n");
+ sample_usage(argv, long_options, __doc__, mask, true);
+ goto end_destroy;
+ }
+
+ skel->rodata->from_match[i] = ifaces[i];
+ skel->rodata->to_match[i] = ifaces[i];
+ }
+
+ ret = xdp_redirect_map_multi__load(skel);
+ if (ret < 0) {
+ fprintf(stderr, "Failed to xdp_redirect_map_multi__load: %s\n",
+ strerror(errno));
+ ret = EXIT_FAIL_BPF;
+ goto end_destroy;
+ }
+
+ if (xdp_devmap_attached) {
+ /* Update mac_map with all egress interfaces' mac addr */
+ if (update_mac_map(skel->maps.mac_map) < 0) {
+ fprintf(stderr, "Updating mac address failed\n");
+ ret = EXIT_FAIL;
+ goto end_destroy;
+ }
+ }
+
+ ret = sample_init(skel, mask);
+ if (ret < 0) {
+ fprintf(stderr, "Failed to initialize sample: %s\n", strerror(-ret));
+ ret = EXIT_FAIL;
+ goto end_destroy;
+ }
+
+ ingress_prog = skel->progs.xdp_redirect_map_native;
+ forward_map = skel->maps.forward_map_native;
+
+ for (i = 0; ifaces[i] > 0; i++) {
+ ifindex = ifaces[i];
+
+ ret = EXIT_FAIL_XDP;
+restart:
+ /* bind prog_fd to each interface */
+ if (sample_install_xdp(ingress_prog, ifindex, generic, force) < 0) {
+ if (generic && !tried) {
+ fprintf(stderr,
+ "Trying fallback to sizeof(int) as value_size for devmap in generic mode\n");
+ ingress_prog = skel->progs.xdp_redirect_map_general;
+ forward_map = skel->maps.forward_map_general;
+ tried = true;
+ goto restart;
+ }
+ goto end_destroy;
+ }
+
+ /* Add all the interfaces to forward group and attach
+ * egress devmap program if exist
+ */
+ devmap_val.ifindex = ifindex;
+ if (xdp_devmap_attached)
+ devmap_val.bpf_prog.fd = bpf_program__fd(skel->progs.xdp_devmap_prog);
+ ret = bpf_map_update_elem(bpf_map__fd(forward_map), &ifindex, &devmap_val, 0);
+ if (ret < 0) {
+ fprintf(stderr, "Failed to update devmap value: %s\n",
+ strerror(errno));
+ ret = EXIT_FAIL_BPF;
+ goto end_destroy;
+ }
+ }
+
+ ret = sample_run(interval, NULL, NULL);
+ if (ret < 0) {
+ fprintf(stderr, "Failed during sample run: %s\n", strerror(-ret));
+ ret = EXIT_FAIL;
+ goto end_destroy;
+ }
+ ret = EXIT_OK;
+end_destroy:
+ xdp_redirect_map_multi__destroy(skel);
+end:
+ sample_exit(ret);
+}
diff --git a/samples/bpf/xdp_redirect_map_user.c b/samples/bpf/xdp_redirect_map_user.c
index 35e16dee613e..c889a1394dc1 100644
--- a/samples/bpf/xdp_redirect_map_user.c
+++ b/samples/bpf/xdp_redirect_map_user.c
@@ -1,6 +1,10 @@
// SPDX-License-Identifier: GPL-2.0-only
/* Copyright (c) 2017 Covalent IO, Inc. http://covalent.io
*/
+static const char *__doc__ =
+"XDP redirect tool, using BPF_MAP_TYPE_DEVMAP\n"
+"Usage: xdp_redirect_map <IFINDEX|IFNAME>_IN <IFINDEX|IFNAME>_OUT\n";
+
#include <linux/bpf.h>
#include <linux/if_link.h>
#include <assert.h>
@@ -13,131 +17,86 @@
#include <net/if.h>
#include <unistd.h>
#include <libgen.h>
-#include <sys/resource.h>
-
-#include "bpf_util.h"
+#include <getopt.h>
#include <bpf/bpf.h>
#include <bpf/libbpf.h>
+#include "bpf_util.h"
+#include "xdp_sample_user.h"
+#include "xdp_redirect_map.skel.h"
-static int ifindex_in;
-static int ifindex_out;
-static bool ifindex_out_xdp_dummy_attached = true;
-static __u32 prog_id;
-static __u32 dummy_prog_id;
+static int mask = SAMPLE_RX_CNT | SAMPLE_REDIRECT_ERR_MAP_CNT |
+ SAMPLE_EXCEPTION_CNT | SAMPLE_DEVMAP_XMIT_CNT_MULTI;
-static __u32 xdp_flags = XDP_FLAGS_UPDATE_IF_NOEXIST;
-static int rxcnt_map_fd;
+DEFINE_SAMPLE_INIT(xdp_redirect_map);
-static void int_exit(int sig)
-{
- __u32 curr_prog_id = 0;
+static const struct option long_options[] = {
+ { "help", no_argument, NULL, 'h' },
+ { "skb-mode", no_argument, NULL, 'S' },
+ { "force", no_argument, NULL, 'F' },
+ { "load-egress", no_argument, NULL, 'X' },
+ { "stats", no_argument, NULL, 's' },
+ { "interval", required_argument, NULL, 'i' },
+ { "verbose", no_argument, NULL, 'v' },
+ {}
+};
- if (bpf_get_link_xdp_id(ifindex_in, &curr_prog_id, xdp_flags)) {
- printf("bpf_get_link_xdp_id failed\n");
- exit(1);
- }
- if (prog_id == curr_prog_id)
- bpf_set_link_xdp_fd(ifindex_in, -1, xdp_flags);
- else if (!curr_prog_id)
- printf("couldn't find a prog id on iface IN\n");
- else
- printf("program on iface IN changed, not removing\n");
-
- if (ifindex_out_xdp_dummy_attached) {
- curr_prog_id = 0;
- if (bpf_get_link_xdp_id(ifindex_out, &curr_prog_id,
- xdp_flags)) {
- printf("bpf_get_link_xdp_id failed\n");
- exit(1);
- }
- if (dummy_prog_id == curr_prog_id)
- bpf_set_link_xdp_fd(ifindex_out, -1, xdp_flags);
- else if (!curr_prog_id)
- printf("couldn't find a prog id on iface OUT\n");
- else
- printf("program on iface OUT changed, not removing\n");
- }
- exit(0);
-}
-
-static void poll_stats(int interval, int ifindex)
-{
- unsigned int nr_cpus = bpf_num_possible_cpus();
- __u64 values[nr_cpus], prev[nr_cpus];
-
- memset(prev, 0, sizeof(prev));
-
- while (1) {
- __u64 sum = 0;
- __u32 key = 0;
- int i;
-
- sleep(interval);
- assert(bpf_map_lookup_elem(rxcnt_map_fd, &key, values) == 0);
- for (i = 0; i < nr_cpus; i++)
- sum += (values[i] - prev[i]);
- if (sum)
- printf("ifindex %i: %10llu pkt/s\n",
- ifindex, sum / interval);
- memcpy(prev, values, sizeof(values));
- }
-}
-
-static void usage(const char *prog)
-{
- fprintf(stderr,
- "usage: %s [OPTS] <IFNAME|IFINDEX>_IN <IFNAME|IFINDEX>_OUT\n\n"
- "OPTS:\n"
- " -S use skb-mode\n"
- " -N enforce native mode\n"
- " -F force loading prog\n",
- prog);
-}
+static int verbose = 0;
int main(int argc, char **argv)
{
- struct rlimit r = {RLIM_INFINITY, RLIM_INFINITY};
- struct bpf_prog_load_attr prog_load_attr = {
- .prog_type = BPF_PROG_TYPE_XDP,
- };
- struct bpf_program *prog, *dummy_prog;
- struct bpf_prog_info info = {};
- __u32 info_len = sizeof(info);
- int prog_fd, dummy_prog_fd;
- const char *optstr = "FSN";
- struct bpf_object *obj;
- int ret, opt, key = 0;
- char filename[256];
- int tx_port_map_fd;
-
- while ((opt = getopt(argc, argv, optstr)) != -1) {
+ struct bpf_devmap_val devmap_val = {};
+ bool xdp_devmap_attached = false;
+ struct xdp_redirect_map *skel;
+ char str[2 * IF_NAMESIZE + 1];
+ char ifname_out[IF_NAMESIZE];
+ struct bpf_map *tx_port_map;
+ char ifname_in[IF_NAMESIZE];
+ int ifindex_in, ifindex_out;
+ unsigned long interval = 2;
+ int ret = EXIT_FAIL_OPTION;
+ struct bpf_program *prog;
+ bool generic = false;
+ bool force = false;
+ bool tried = false;
+ bool error = true;
+ int opt, key = 0;
+
+ while ((opt = getopt_long(argc, argv, "hSFXi:vs",
+ long_options, NULL)) != -1) {
switch (opt) {
case 'S':
- xdp_flags |= XDP_FLAGS_SKB_MODE;
- break;
- case 'N':
- /* default, set below */
+ generic = true;
+ /* devmap_xmit tracepoint not available */
+ mask &= ~(SAMPLE_DEVMAP_XMIT_CNT |
+ SAMPLE_DEVMAP_XMIT_CNT_MULTI);
break;
case 'F':
- xdp_flags &= ~XDP_FLAGS_UPDATE_IF_NOEXIST;
+ force = true;
break;
+ case 'X':
+ xdp_devmap_attached = true;
+ break;
+ case 'i':
+ interval = strtoul(optarg, NULL, 0);
+ break;
+ case 'v':
+ sample_switch_mode();
+ verbose = 1;
+ break;
+ case 's':
+ mask |= SAMPLE_REDIRECT_MAP_CNT;
+ break;
+ case 'h':
+ error = false;
default:
- usage(basename(argv[0]));
- return 1;
+ sample_usage(argv, long_options, __doc__, mask, error);
+ return ret;
}
}
- if (!(xdp_flags & XDP_FLAGS_SKB_MODE))
- xdp_flags |= XDP_FLAGS_DRV_MODE;
-
- if (optind == argc) {
- printf("usage: %s <IFNAME|IFINDEX>_IN <IFNAME|IFINDEX>_OUT\n", argv[0]);
- return 1;
- }
-
- if (setrlimit(RLIMIT_MEMLOCK, &r)) {
- perror("setrlimit(RLIMIT_MEMLOCK)");
- return 1;
+ if (argc <= optind + 1) {
+ sample_usage(argv, long_options, __doc__, mask, true);
+ goto end;
}
ifindex_in = if_nametoindex(argv[optind]);
@@ -148,75 +107,122 @@ int main(int argc, char **argv)
if (!ifindex_out)
ifindex_out = strtoul(argv[optind + 1], NULL, 0);
- printf("input: %d output: %d\n", ifindex_in, ifindex_out);
-
- snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]);
- prog_load_attr.file = filename;
+ if (!ifindex_in || !ifindex_out) {
+ fprintf(stderr, "Bad interface index or name\n");
+ sample_usage(argv, long_options, __doc__, mask, true);
+ goto end;
+ }
- if (bpf_prog_load_xattr(&prog_load_attr, &obj, &prog_fd))
- return 1;
+ skel = xdp_redirect_map__open();
+ if (!skel) {
+ fprintf(stderr, "Failed to xdp_redirect_map__open: %s\n",
+ strerror(errno));
+ ret = EXIT_FAIL_BPF;
+ goto end;
+ }
- prog = bpf_program__next(NULL, obj);
- dummy_prog = bpf_program__next(prog, obj);
- if (!prog || !dummy_prog) {
- printf("finding a prog in obj file failed\n");
- return 1;
+ ret = sample_init_pre_load(skel);
+ if (ret < 0) {
+ fprintf(stderr, "Failed to sample_init_pre_load: %s\n", strerror(-ret));
+ ret = EXIT_FAIL_BPF;
+ goto end_destroy;
}
- /* bpf_prog_load_xattr gives us the pointer to first prog's fd,
- * so we're missing only the fd for dummy prog
- */
- dummy_prog_fd = bpf_program__fd(dummy_prog);
- if (prog_fd < 0 || dummy_prog_fd < 0) {
- printf("bpf_prog_load_xattr: %s\n", strerror(errno));
- return 1;
+
+ /* Load 2nd xdp prog on egress. */
+ if (xdp_devmap_attached) {
+ ret = get_mac_addr(ifindex_out, skel->rodata->tx_mac_addr);
+ if (ret < 0) {
+ fprintf(stderr, "Failed to get interface %d mac address: %s\n",
+ ifindex_out, strerror(-ret));
+ ret = EXIT_FAIL;
+ goto end_destroy;
+ }
+ if (verbose)
+ printf("Egress ifindex:%d using src MAC %02x:%02x:%02x:%02x:%02x:%02x\n",
+ ifindex_out,
+ skel->rodata->tx_mac_addr[0], skel->rodata->tx_mac_addr[1],
+ skel->rodata->tx_mac_addr[2], skel->rodata->tx_mac_addr[3],
+ skel->rodata->tx_mac_addr[4], skel->rodata->tx_mac_addr[5]);
}
- tx_port_map_fd = bpf_object__find_map_fd_by_name(obj, "tx_port");
- rxcnt_map_fd = bpf_object__find_map_fd_by_name(obj, "rxcnt");
- if (tx_port_map_fd < 0 || rxcnt_map_fd < 0) {
- printf("bpf_object__find_map_fd_by_name failed\n");
- return 1;
+ skel->rodata->from_match[0] = ifindex_in;
+ skel->rodata->to_match[0] = ifindex_out;
+
+ ret = xdp_redirect_map__load(skel);
+ if (ret < 0) {
+ fprintf(stderr, "Failed to xdp_redirect_map__load: %s\n",
+ strerror(errno));
+ ret = EXIT_FAIL_BPF;
+ goto end_destroy;
}
- if (bpf_set_link_xdp_fd(ifindex_in, prog_fd, xdp_flags) < 0) {
- printf("ERROR: link set xdp fd failed on %d\n", ifindex_in);
- return 1;
+ ret = sample_init(skel, mask);
+ if (ret < 0) {
+ fprintf(stderr, "Failed to initialize sample: %s\n", strerror(-ret));
+ ret = EXIT_FAIL;
+ goto end_destroy;
}
- ret = bpf_obj_get_info_by_fd(prog_fd, &info, &info_len);
- if (ret) {
- printf("can't get prog info - %s\n", strerror(errno));
- return ret;
+ prog = skel->progs.xdp_redirect_map_native;
+ tx_port_map = skel->maps.tx_port_native;
+restart:
+ if (sample_install_xdp(prog, ifindex_in, generic, force) < 0) {
+ /* First try with struct bpf_devmap_val as value for generic
+ * mode, then fallback to sizeof(int) for older kernels.
+ */
+ fprintf(stderr,
+ "Trying fallback to sizeof(int) as value_size for devmap in generic mode\n");
+ if (generic && !tried) {
+ prog = skel->progs.xdp_redirect_map_general;
+ tx_port_map = skel->maps.tx_port_general;
+ tried = true;
+ goto restart;
+ }
+ ret = EXIT_FAIL_XDP;
+ goto end_destroy;
}
- prog_id = info.id;
/* Loading dummy XDP prog on out-device */
- if (bpf_set_link_xdp_fd(ifindex_out, dummy_prog_fd,
- (xdp_flags | XDP_FLAGS_UPDATE_IF_NOEXIST)) < 0) {
- printf("WARN: link set xdp fd failed on %d\n", ifindex_out);
- ifindex_out_xdp_dummy_attached = false;
+ sample_install_xdp(skel->progs.xdp_redirect_dummy_prog, ifindex_out, generic, force);
+
+ devmap_val.ifindex = ifindex_out;
+ if (xdp_devmap_attached)
+ devmap_val.bpf_prog.fd = bpf_program__fd(skel->progs.xdp_redirect_map_egress);
+ ret = bpf_map_update_elem(bpf_map__fd(tx_port_map), &key, &devmap_val, 0);
+ if (ret < 0) {
+ fprintf(stderr, "Failed to update devmap value: %s\n",
+ strerror(errno));
+ ret = EXIT_FAIL_BPF;
+ goto end_destroy;
}
- memset(&info, 0, sizeof(info));
- ret = bpf_obj_get_info_by_fd(dummy_prog_fd, &info, &info_len);
- if (ret) {
- printf("can't get prog info - %s\n", strerror(errno));
- return ret;
+ ret = EXIT_FAIL;
+ if (!if_indextoname(ifindex_in, ifname_in)) {
+ fprintf(stderr, "Failed to if_indextoname for %d: %s\n", ifindex_in,
+ strerror(errno));
+ goto end_destroy;
}
- dummy_prog_id = info.id;
- signal(SIGINT, int_exit);
- signal(SIGTERM, int_exit);
-
- /* populate virtual to physical port map */
- ret = bpf_map_update_elem(tx_port_map_fd, &key, &ifindex_out, 0);
- if (ret) {
- perror("bpf_update_elem");
- goto out;
+ if (!if_indextoname(ifindex_out, ifname_out)) {
+ fprintf(stderr, "Failed to if_indextoname for %d: %s\n", ifindex_out,
+ strerror(errno));
+ goto end_destroy;
}
- poll_stats(2, ifindex_out);
+ safe_strncpy(str, get_driver_name(ifindex_in), sizeof(str));
+ printf("Redirecting from %s (ifindex %d; driver %s) to %s (ifindex %d; driver %s)\n",
+ ifname_in, ifindex_in, str, ifname_out, ifindex_out, get_driver_name(ifindex_out));
+ snprintf(str, sizeof(str), "%s->%s", ifname_in, ifname_out);
-out:
- return 0;
+ ret = sample_run(interval, NULL, NULL);
+ if (ret < 0) {
+ fprintf(stderr, "Failed during sample run: %s\n", strerror(-ret));
+ ret = EXIT_FAIL;
+ goto end_destroy;
+ }
+ ret = EXIT_OK;
+end_destroy:
+ xdp_redirect_map__destroy(skel);
+end:
+ sample_exit(ret);
}
diff --git a/samples/bpf/xdp_redirect_user.c b/samples/bpf/xdp_redirect_user.c
index 9ca2bf457cda..8663dd631b6e 100644
--- a/samples/bpf/xdp_redirect_user.c
+++ b/samples/bpf/xdp_redirect_user.c
@@ -1,6 +1,10 @@
// SPDX-License-Identifier: GPL-2.0-only
/* Copyright (c) 2016 John Fastabend <john.r.fastabend@intel.com>
*/
+static const char *__doc__ =
+"XDP redirect tool, using bpf_redirect helper\n"
+"Usage: xdp_redirect <IFINDEX|IFNAME>_IN <IFINDEX|IFNAME>_OUT\n";
+
#include <linux/bpf.h>
#include <linux/if_link.h>
#include <assert.h>
@@ -13,132 +17,72 @@
#include <net/if.h>
#include <unistd.h>
#include <libgen.h>
-#include <sys/resource.h>
-
-#include "bpf_util.h"
+#include <getopt.h>
#include <bpf/bpf.h>
#include <bpf/libbpf.h>
+#include "bpf_util.h"
+#include "xdp_sample_user.h"
+#include "xdp_redirect.skel.h"
-static int ifindex_in;
-static int ifindex_out;
-static bool ifindex_out_xdp_dummy_attached = true;
-static __u32 prog_id;
-static __u32 dummy_prog_id;
-
-static __u32 xdp_flags = XDP_FLAGS_UPDATE_IF_NOEXIST;
-static int rxcnt_map_fd;
-
-static void int_exit(int sig)
-{
- __u32 curr_prog_id = 0;
-
- if (bpf_get_link_xdp_id(ifindex_in, &curr_prog_id, xdp_flags)) {
- printf("bpf_get_link_xdp_id failed\n");
- exit(1);
- }
- if (prog_id == curr_prog_id)
- bpf_set_link_xdp_fd(ifindex_in, -1, xdp_flags);
- else if (!curr_prog_id)
- printf("couldn't find a prog id on iface IN\n");
- else
- printf("program on iface IN changed, not removing\n");
-
- if (ifindex_out_xdp_dummy_attached) {
- curr_prog_id = 0;
- if (bpf_get_link_xdp_id(ifindex_out, &curr_prog_id,
- xdp_flags)) {
- printf("bpf_get_link_xdp_id failed\n");
- exit(1);
- }
- if (dummy_prog_id == curr_prog_id)
- bpf_set_link_xdp_fd(ifindex_out, -1, xdp_flags);
- else if (!curr_prog_id)
- printf("couldn't find a prog id on iface OUT\n");
- else
- printf("program on iface OUT changed, not removing\n");
- }
- exit(0);
-}
-
-static void poll_stats(int interval, int ifindex)
-{
- unsigned int nr_cpus = bpf_num_possible_cpus();
- __u64 values[nr_cpus], prev[nr_cpus];
-
- memset(prev, 0, sizeof(prev));
-
- while (1) {
- __u64 sum = 0;
- __u32 key = 0;
- int i;
-
- sleep(interval);
- assert(bpf_map_lookup_elem(rxcnt_map_fd, &key, values) == 0);
- for (i = 0; i < nr_cpus; i++)
- sum += (values[i] - prev[i]);
- if (sum)
- printf("ifindex %i: %10llu pkt/s\n",
- ifindex, sum / interval);
- memcpy(prev, values, sizeof(values));
- }
-}
+static int mask = SAMPLE_RX_CNT | SAMPLE_REDIRECT_ERR_CNT |
+ SAMPLE_EXCEPTION_CNT | SAMPLE_DEVMAP_XMIT_CNT_MULTI;
-static void usage(const char *prog)
-{
- fprintf(stderr,
- "usage: %s [OPTS] <IFNAME|IFINDEX>_IN <IFNAME|IFINDEX>_OUT\n\n"
- "OPTS:\n"
- " -S use skb-mode\n"
- " -N enforce native mode\n"
- " -F force loading prog\n",
- prog);
-}
+DEFINE_SAMPLE_INIT(xdp_redirect);
+static const struct option long_options[] = {
+ {"help", no_argument, NULL, 'h' },
+ {"skb-mode", no_argument, NULL, 'S' },
+ {"force", no_argument, NULL, 'F' },
+ {"stats", no_argument, NULL, 's' },
+ {"interval", required_argument, NULL, 'i' },
+ {"verbose", no_argument, NULL, 'v' },
+ {}
+};
int main(int argc, char **argv)
{
- struct rlimit r = {RLIM_INFINITY, RLIM_INFINITY};
- struct bpf_prog_load_attr prog_load_attr = {
- .prog_type = BPF_PROG_TYPE_XDP,
- };
- struct bpf_program *prog, *dummy_prog;
- int prog_fd, tx_port_map_fd, opt;
- struct bpf_prog_info info = {};
- __u32 info_len = sizeof(info);
- const char *optstr = "FSN";
- struct bpf_object *obj;
- char filename[256];
- int dummy_prog_fd;
- int ret, key = 0;
-
- while ((opt = getopt(argc, argv, optstr)) != -1) {
+ int ifindex_in, ifindex_out, opt;
+ char str[2 * IF_NAMESIZE + 1];
+ char ifname_out[IF_NAMESIZE];
+ char ifname_in[IF_NAMESIZE];
+ int ret = EXIT_FAIL_OPTION;
+ unsigned long interval = 2;
+ struct xdp_redirect *skel;
+ bool generic = false;
+ bool force = false;
+ bool error = true;
+
+ while ((opt = getopt_long(argc, argv, "hSFi:vs",
+ long_options, NULL)) != -1) {
switch (opt) {
case 'S':
- xdp_flags |= XDP_FLAGS_SKB_MODE;
- break;
- case 'N':
- /* default, set below */
+ generic = true;
+ mask &= ~(SAMPLE_DEVMAP_XMIT_CNT |
+ SAMPLE_DEVMAP_XMIT_CNT_MULTI);
break;
case 'F':
- xdp_flags &= ~XDP_FLAGS_UPDATE_IF_NOEXIST;
+ force = true;
+ break;
+ case 'i':
+ interval = strtoul(optarg, NULL, 0);
+ break;
+ case 'v':
+ sample_switch_mode();
break;
+ case 's':
+ mask |= SAMPLE_REDIRECT_CNT;
+ break;
+ case 'h':
+ error = false;
default:
- usage(basename(argv[0]));
- return 1;
+ sample_usage(argv, long_options, __doc__, mask, error);
+ return ret;
}
}
- if (!(xdp_flags & XDP_FLAGS_SKB_MODE))
- xdp_flags |= XDP_FLAGS_DRV_MODE;
-
- if (optind == argc) {
- printf("usage: %s <IFNAME|IFINDEX>_IN <IFNAME|IFINDEX>_OUT\n", argv[0]);
- return 1;
- }
-
- if (setrlimit(RLIMIT_MEMLOCK, &r)) {
- perror("setrlimit(RLIMIT_MEMLOCK)");
- return 1;
+ if (argc <= optind + 1) {
+ sample_usage(argv, long_options, __doc__, mask, true);
+ return ret;
}
ifindex_in = if_nametoindex(argv[optind]);
@@ -149,75 +93,80 @@ int main(int argc, char **argv)
if (!ifindex_out)
ifindex_out = strtoul(argv[optind + 1], NULL, 0);
- printf("input: %d output: %d\n", ifindex_in, ifindex_out);
-
- snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]);
- prog_load_attr.file = filename;
-
- if (bpf_prog_load_xattr(&prog_load_attr, &obj, &prog_fd))
- return 1;
-
- prog = bpf_program__next(NULL, obj);
- dummy_prog = bpf_program__next(prog, obj);
- if (!prog || !dummy_prog) {
- printf("finding a prog in obj file failed\n");
- return 1;
+ if (!ifindex_in || !ifindex_out) {
+ fprintf(stderr, "Bad interface index or name\n");
+ sample_usage(argv, long_options, __doc__, mask, true);
+ goto end;
}
- /* bpf_prog_load_xattr gives us the pointer to first prog's fd,
- * so we're missing only the fd for dummy prog
- */
- dummy_prog_fd = bpf_program__fd(dummy_prog);
- if (prog_fd < 0 || dummy_prog_fd < 0) {
- printf("bpf_prog_load_xattr: %s\n", strerror(errno));
- return 1;
+
+ skel = xdp_redirect__open();
+ if (!skel) {
+ fprintf(stderr, "Failed to xdp_redirect__open: %s\n", strerror(errno));
+ ret = EXIT_FAIL_BPF;
+ goto end;
}
- tx_port_map_fd = bpf_object__find_map_fd_by_name(obj, "tx_port");
- rxcnt_map_fd = bpf_object__find_map_fd_by_name(obj, "rxcnt");
- if (tx_port_map_fd < 0 || rxcnt_map_fd < 0) {
- printf("bpf_object__find_map_fd_by_name failed\n");
- return 1;
+ ret = sample_init_pre_load(skel);
+ if (ret < 0) {
+ fprintf(stderr, "Failed to sample_init_pre_load: %s\n", strerror(-ret));
+ ret = EXIT_FAIL_BPF;
+ goto end_destroy;
}
- if (bpf_set_link_xdp_fd(ifindex_in, prog_fd, xdp_flags) < 0) {
- printf("ERROR: link set xdp fd failed on %d\n", ifindex_in);
- return 1;
+ skel->rodata->from_match[0] = ifindex_in;
+ skel->rodata->to_match[0] = ifindex_out;
+ skel->rodata->ifindex_out = ifindex_out;
+
+ ret = xdp_redirect__load(skel);
+ if (ret < 0) {
+ fprintf(stderr, "Failed to xdp_redirect__load: %s\n", strerror(errno));
+ ret = EXIT_FAIL_BPF;
+ goto end_destroy;
}
- ret = bpf_obj_get_info_by_fd(prog_fd, &info, &info_len);
- if (ret) {
- printf("can't get prog info - %s\n", strerror(errno));
- return ret;
+ ret = sample_init(skel, mask);
+ if (ret < 0) {
+ fprintf(stderr, "Failed to initialize sample: %s\n", strerror(-ret));
+ ret = EXIT_FAIL;
+ goto end_destroy;
}
- prog_id = info.id;
+
+ ret = EXIT_FAIL_XDP;
+ if (sample_install_xdp(skel->progs.xdp_redirect_prog, ifindex_in,
+ generic, force) < 0)
+ goto end_destroy;
/* Loading dummy XDP prog on out-device */
- if (bpf_set_link_xdp_fd(ifindex_out, dummy_prog_fd,
- (xdp_flags | XDP_FLAGS_UPDATE_IF_NOEXIST)) < 0) {
- printf("WARN: link set xdp fd failed on %d\n", ifindex_out);
- ifindex_out_xdp_dummy_attached = false;
+ sample_install_xdp(skel->progs.xdp_redirect_dummy_prog, ifindex_out,
+ generic, force);
+
+ ret = EXIT_FAIL;
+ if (!if_indextoname(ifindex_in, ifname_in)) {
+ fprintf(stderr, "Failed to if_indextoname for %d: %s\n", ifindex_in,
+ strerror(errno));
+ goto end_destroy;
}
- memset(&info, 0, sizeof(info));
- ret = bpf_obj_get_info_by_fd(dummy_prog_fd, &info, &info_len);
- if (ret) {
- printf("can't get prog info - %s\n", strerror(errno));
- return ret;
+ if (!if_indextoname(ifindex_out, ifname_out)) {
+ fprintf(stderr, "Failed to if_indextoname for %d: %s\n", ifindex_out,
+ strerror(errno));
+ goto end_destroy;
}
- dummy_prog_id = info.id;
- signal(SIGINT, int_exit);
- signal(SIGTERM, int_exit);
+ safe_strncpy(str, get_driver_name(ifindex_in), sizeof(str));
+ printf("Redirecting from %s (ifindex %d; driver %s) to %s (ifindex %d; driver %s)\n",
+ ifname_in, ifindex_in, str, ifname_out, ifindex_out, get_driver_name(ifindex_out));
+ snprintf(str, sizeof(str), "%s->%s", ifname_in, ifname_out);
- /* bpf redirect port */
- ret = bpf_map_update_elem(tx_port_map_fd, &key, &ifindex_out, 0);
- if (ret) {
- perror("bpf_update_elem");
- goto out;
+ ret = sample_run(interval, NULL, NULL);
+ if (ret < 0) {
+ fprintf(stderr, "Failed during sample run: %s\n", strerror(-ret));
+ ret = EXIT_FAIL;
+ goto end_destroy;
}
-
- poll_stats(2, ifindex_out);
-
-out:
- return 0;
+ ret = EXIT_OK;
+end_destroy:
+ xdp_redirect__destroy(skel);
+end:
+ sample_exit(ret);
}
diff --git a/samples/bpf/xdp_router_ipv4.bpf.c b/samples/bpf/xdp_router_ipv4.bpf.c
new file mode 100644
index 000000000000..0643330d1d2e
--- /dev/null
+++ b/samples/bpf/xdp_router_ipv4.bpf.c
@@ -0,0 +1,189 @@
+/* Copyright (C) 2017 Cavium, Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License
+ * as published by the Free Software Foundation.
+ */
+
+#include "vmlinux.h"
+#include "xdp_sample.bpf.h"
+#include "xdp_sample_shared.h"
+
+#define ETH_ALEN 6
+#define ETH_P_8021Q 0x8100
+#define ETH_P_8021AD 0x88A8
+
+struct trie_value {
+ __u8 prefix[4];
+ __be64 value;
+ int ifindex;
+ int metric;
+ __be32 gw;
+};
+
+/* Key for lpm_trie */
+union key_4 {
+ u32 b32[2];
+ u8 b8[8];
+};
+
+struct arp_entry {
+ __be64 mac;
+ __be32 dst;
+};
+
+struct direct_map {
+ struct arp_entry arp;
+ int ifindex;
+ __be64 mac;
+};
+
+/* Map for trie implementation */
+struct {
+ __uint(type, BPF_MAP_TYPE_LPM_TRIE);
+ __uint(key_size, 8);
+ __uint(value_size, sizeof(struct trie_value));
+ __uint(max_entries, 50);
+ __uint(map_flags, BPF_F_NO_PREALLOC);
+} lpm_map SEC(".maps");
+
+/* Map for ARP table */
+struct {
+ __uint(type, BPF_MAP_TYPE_HASH);
+ __type(key, __be32);
+ __type(value, __be64);
+ __uint(max_entries, 50);
+} arp_table SEC(".maps");
+
+/* Map to keep the exact match entries in the route table */
+struct {
+ __uint(type, BPF_MAP_TYPE_HASH);
+ __type(key, __be32);
+ __type(value, struct direct_map);
+ __uint(max_entries, 50);
+} exact_match SEC(".maps");
+
+struct {
+ __uint(type, BPF_MAP_TYPE_DEVMAP);
+ __uint(key_size, sizeof(int));
+ __uint(value_size, sizeof(int));
+ __uint(max_entries, 100);
+} tx_port SEC(".maps");
+
+SEC("xdp")
+int xdp_router_ipv4_prog(struct xdp_md *ctx)
+{
+ void *data_end = (void *)(long)ctx->data_end;
+ void *data = (void *)(long)ctx->data;
+ struct ethhdr *eth = data;
+ u64 nh_off = sizeof(*eth);
+ struct datarec *rec;
+ __be16 h_proto;
+ u32 key = 0;
+
+ rec = bpf_map_lookup_elem(&rx_cnt, &key);
+ if (rec)
+ NO_TEAR_INC(rec->processed);
+
+ if (data + nh_off > data_end)
+ goto drop;
+
+ h_proto = eth->h_proto;
+ if (h_proto == bpf_htons(ETH_P_8021Q) ||
+ h_proto == bpf_htons(ETH_P_8021AD)) {
+ struct vlan_hdr *vhdr;
+
+ vhdr = data + nh_off;
+ nh_off += sizeof(struct vlan_hdr);
+ if (data + nh_off > data_end)
+ goto drop;
+
+ h_proto = vhdr->h_vlan_encapsulated_proto;
+ }
+
+ switch (bpf_ntohs(h_proto)) {
+ case ETH_P_ARP:
+ if (rec)
+ NO_TEAR_INC(rec->xdp_pass);
+ return XDP_PASS;
+ case ETH_P_IP: {
+ struct iphdr *iph = data + nh_off;
+ struct direct_map *direct_entry;
+ __be64 *dest_mac, *src_mac;
+ int forward_to;
+
+ if (iph + 1 > data_end)
+ goto drop;
+
+ direct_entry = bpf_map_lookup_elem(&exact_match, &iph->daddr);
+
+ /* Check for exact match, this would give a faster lookup */
+ if (direct_entry && direct_entry->mac &&
+ direct_entry->arp.mac) {
+ src_mac = &direct_entry->mac;
+ dest_mac = &direct_entry->arp.mac;
+ forward_to = direct_entry->ifindex;
+ } else {
+ struct trie_value *prefix_value;
+ union key_4 key4;
+
+ /* Look up in the trie for lpm */
+ key4.b32[0] = 32;
+ key4.b8[4] = iph->daddr & 0xff;
+ key4.b8[5] = (iph->daddr >> 8) & 0xff;
+ key4.b8[6] = (iph->daddr >> 16) & 0xff;
+ key4.b8[7] = (iph->daddr >> 24) & 0xff;
+
+ prefix_value = bpf_map_lookup_elem(&lpm_map, &key4);
+ if (!prefix_value)
+ goto drop;
+
+ forward_to = prefix_value->ifindex;
+ src_mac = &prefix_value->value;
+ if (!src_mac)
+ goto drop;
+
+ dest_mac = bpf_map_lookup_elem(&arp_table, &iph->daddr);
+ if (!dest_mac) {
+ if (!prefix_value->gw)
+ goto drop;
+
+ dest_mac = bpf_map_lookup_elem(&arp_table,
+ &prefix_value->gw);
+ if (!dest_mac) {
+ /* Forward the packet to the kernel in
+ * order to trigger ARP discovery for
+ * the default gw.
+ */
+ if (rec)
+ NO_TEAR_INC(rec->xdp_pass);
+ return XDP_PASS;
+ }
+ }
+ }
+
+ if (src_mac && dest_mac) {
+ int ret;
+
+ __builtin_memcpy(eth->h_dest, dest_mac, ETH_ALEN);
+ __builtin_memcpy(eth->h_source, src_mac, ETH_ALEN);
+
+ ret = bpf_redirect_map(&tx_port, forward_to, 0);
+ if (ret == XDP_REDIRECT) {
+ if (rec)
+ NO_TEAR_INC(rec->xdp_redirect);
+ return ret;
+ }
+ }
+ }
+ default:
+ break;
+ }
+drop:
+ if (rec)
+ NO_TEAR_INC(rec->xdp_drop);
+
+ return XDP_DROP;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/samples/bpf/xdp_router_ipv4_kern.c b/samples/bpf/xdp_router_ipv4_kern.c
deleted file mode 100644
index b37ca2b13063..000000000000
--- a/samples/bpf/xdp_router_ipv4_kern.c
+++ /dev/null
@@ -1,186 +0,0 @@
-/* Copyright (C) 2017 Cavium, Inc.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of version 2 of the GNU General Public License
- * as published by the Free Software Foundation.
- */
-#define KBUILD_MODNAME "foo"
-#include <uapi/linux/bpf.h>
-#include <linux/in.h>
-#include <linux/if_ether.h>
-#include <linux/if_packet.h>
-#include <linux/if_vlan.h>
-#include <linux/ip.h>
-#include <linux/ipv6.h>
-#include <bpf/bpf_helpers.h>
-#include <linux/slab.h>
-#include <net/ip_fib.h>
-
-struct trie_value {
- __u8 prefix[4];
- __be64 value;
- int ifindex;
- int metric;
- __be32 gw;
-};
-
-/* Key for lpm_trie*/
-union key_4 {
- u32 b32[2];
- u8 b8[8];
-};
-
-struct arp_entry {
- __be64 mac;
- __be32 dst;
-};
-
-struct direct_map {
- struct arp_entry arp;
- int ifindex;
- __be64 mac;
-};
-
-/* Map for trie implementation*/
-struct {
- __uint(type, BPF_MAP_TYPE_LPM_TRIE);
- __uint(key_size, 8);
- __uint(value_size, sizeof(struct trie_value));
- __uint(max_entries, 50);
- __uint(map_flags, BPF_F_NO_PREALLOC);
-} lpm_map SEC(".maps");
-
-/* Map for counter*/
-struct {
- __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY);
- __type(key, u32);
- __type(value, u64);
- __uint(max_entries, 256);
-} rxcnt SEC(".maps");
-
-/* Map for ARP table*/
-struct {
- __uint(type, BPF_MAP_TYPE_HASH);
- __type(key, __be32);
- __type(value, __be64);
- __uint(max_entries, 50);
-} arp_table SEC(".maps");
-
-/* Map to keep the exact match entries in the route table*/
-struct {
- __uint(type, BPF_MAP_TYPE_HASH);
- __type(key, __be32);
- __type(value, struct direct_map);
- __uint(max_entries, 50);
-} exact_match SEC(".maps");
-
-struct {
- __uint(type, BPF_MAP_TYPE_DEVMAP);
- __uint(key_size, sizeof(int));
- __uint(value_size, sizeof(int));
- __uint(max_entries, 100);
-} tx_port SEC(".maps");
-
-/* Function to set source and destination mac of the packet */
-static inline void set_src_dst_mac(void *data, void *src, void *dst)
-{
- unsigned short *source = src;
- unsigned short *dest = dst;
- unsigned short *p = data;
-
- __builtin_memcpy(p, dest, 6);
- __builtin_memcpy(p + 3, source, 6);
-}
-
-/* Parse IPV4 packet to get SRC, DST IP and protocol */
-static inline int parse_ipv4(void *data, u64 nh_off, void *data_end,
- __be32 *src, __be32 *dest)
-{
- struct iphdr *iph = data + nh_off;
-
- if (iph + 1 > data_end)
- return 0;
- *src = iph->saddr;
- *dest = iph->daddr;
- return iph->protocol;
-}
-
-SEC("xdp_router_ipv4")
-int xdp_router_ipv4_prog(struct xdp_md *ctx)
-{
- void *data_end = (void *)(long)ctx->data_end;
- __be64 *dest_mac = NULL, *src_mac = NULL;
- void *data = (void *)(long)ctx->data;
- struct trie_value *prefix_value;
- int rc = XDP_DROP, forward_to;
- struct ethhdr *eth = data;
- union key_4 key4;
- long *value;
- u16 h_proto;
- u32 ipproto;
- u64 nh_off;
-
- nh_off = sizeof(*eth);
- if (data + nh_off > data_end)
- return rc;
-
- h_proto = eth->h_proto;
-
- if (h_proto == htons(ETH_P_8021Q) || h_proto == htons(ETH_P_8021AD)) {
- struct vlan_hdr *vhdr;
-
- vhdr = data + nh_off;
- nh_off += sizeof(struct vlan_hdr);
- if (data + nh_off > data_end)
- return rc;
- h_proto = vhdr->h_vlan_encapsulated_proto;
- }
- if (h_proto == htons(ETH_P_ARP)) {
- return XDP_PASS;
- } else if (h_proto == htons(ETH_P_IP)) {
- struct direct_map *direct_entry;
- __be32 src_ip = 0, dest_ip = 0;
-
- ipproto = parse_ipv4(data, nh_off, data_end, &src_ip, &dest_ip);
- direct_entry = bpf_map_lookup_elem(&exact_match, &dest_ip);
- /* Check for exact match, this would give a faster lookup*/
- if (direct_entry && direct_entry->mac && direct_entry->arp.mac) {
- src_mac = &direct_entry->mac;
- dest_mac = &direct_entry->arp.mac;
- forward_to = direct_entry->ifindex;
- } else {
- /* Look up in the trie for lpm*/
- key4.b32[0] = 32;
- key4.b8[4] = dest_ip & 0xff;
- key4.b8[5] = (dest_ip >> 8) & 0xff;
- key4.b8[6] = (dest_ip >> 16) & 0xff;
- key4.b8[7] = (dest_ip >> 24) & 0xff;
- prefix_value = bpf_map_lookup_elem(&lpm_map, &key4);
- if (!prefix_value)
- return XDP_DROP;
- src_mac = &prefix_value->value;
- if (!src_mac)
- return XDP_DROP;
- dest_mac = bpf_map_lookup_elem(&arp_table, &dest_ip);
- if (!dest_mac) {
- if (!prefix_value->gw)
- return XDP_DROP;
- dest_ip = prefix_value->gw;
- dest_mac = bpf_map_lookup_elem(&arp_table, &dest_ip);
- }
- forward_to = prefix_value->ifindex;
- }
- } else {
- ipproto = 0;
- }
- if (src_mac && dest_mac) {
- set_src_dst_mac(data, src_mac, dest_mac);
- value = bpf_map_lookup_elem(&rxcnt, &ipproto);
- if (value)
- *value += 1;
- return bpf_redirect_map(&tx_port, forward_to, 0);
- }
- return rc;
-}
-
-char _license[] SEC("license") = "GPL";
diff --git a/samples/bpf/xdp_router_ipv4_user.c b/samples/bpf/xdp_router_ipv4_user.c
index c2da1b51ff95..683913bbf279 100644
--- a/samples/bpf/xdp_router_ipv4_user.c
+++ b/samples/bpf/xdp_router_ipv4_user.c
@@ -22,72 +22,41 @@
#include <sys/syscall.h>
#include "bpf_util.h"
#include <bpf/libbpf.h>
-#include <sys/resource.h>
#include <libgen.h>
+#include <getopt.h>
+#include <pthread.h>
+#include "xdp_sample_user.h"
+#include "xdp_router_ipv4.skel.h"
-int sock, sock_arp, flags = XDP_FLAGS_UPDATE_IF_NOEXIST;
-static int total_ifindex;
-static int *ifindex_list;
-static __u32 *prog_id_list;
-char buf[8192];
+static const char *__doc__ =
+"XDP IPv4 router implementation\n"
+"Usage: xdp_router_ipv4 <IFNAME-0> ... <IFNAME-N>\n";
+
+static char buf[8192];
static int lpm_map_fd;
-static int rxcnt_map_fd;
static int arp_table_map_fd;
static int exact_match_map_fd;
static int tx_port_map_fd;
-static int get_route_table(int rtm_family);
-static void int_exit(int sig)
-{
- __u32 prog_id = 0;
- int i = 0;
+static bool routes_thread_exit;
+static int interval = 5;
- for (i = 0; i < total_ifindex; i++) {
- if (bpf_get_link_xdp_id(ifindex_list[i], &prog_id, flags)) {
- printf("bpf_get_link_xdp_id on iface %d failed\n",
- ifindex_list[i]);
- exit(1);
- }
- if (prog_id_list[i] == prog_id)
- bpf_set_link_xdp_fd(ifindex_list[i], -1, flags);
- else if (!prog_id)
- printf("couldn't find a prog id on iface %d\n",
- ifindex_list[i]);
- else
- printf("program on iface %d changed, not removing\n",
- ifindex_list[i]);
- prog_id = 0;
- }
- exit(0);
-}
+static int mask = SAMPLE_RX_CNT | SAMPLE_REDIRECT_ERR_MAP_CNT |
+ SAMPLE_DEVMAP_XMIT_CNT_MULTI | SAMPLE_EXCEPTION_CNT;
-static void close_and_exit(int sig)
-{
- close(sock);
- close(sock_arp);
+DEFINE_SAMPLE_INIT(xdp_router_ipv4);
- int_exit(0);
-}
+static const struct option long_options[] = {
+ { "help", no_argument, NULL, 'h' },
+ { "skb-mode", no_argument, NULL, 'S' },
+ { "force", no_argument, NULL, 'F' },
+ { "interval", required_argument, NULL, 'i' },
+ { "verbose", no_argument, NULL, 'v' },
+ { "stats", no_argument, NULL, 's' },
+ {}
+};
-/* Get the mac address of the interface given interface name */
-static __be64 getmac(char *iface)
-{
- struct ifreq ifr;
- __be64 mac = 0;
- int fd, i;
-
- fd = socket(AF_INET, SOCK_DGRAM, 0);
- ifr.ifr_addr.sa_family = AF_INET;
- strncpy(ifr.ifr_name, iface, IFNAMSIZ - 1);
- if (ioctl(fd, SIOCGIFHWADDR, &ifr) < 0) {
- printf("ioctl failed leaving....\n");
- return -1;
- }
- for (i = 0; i < 6 ; i++)
- *((__u8 *)&mac + i) = (__u8)ifr.ifr_hwaddr.sa_data[i];
- close(fd);
- return mac;
-}
+static int get_route_table(int rtm_family);
static int recv_msg(struct sockaddr_nl sock_addr, int sock)
{
@@ -130,7 +99,6 @@ static void read_route(struct nlmsghdr *nh, int nll)
int i;
struct route_table {
int dst_len, iface, metric;
- char *iface_name;
__be32 dst, gw;
__be64 mac;
} route;
@@ -145,17 +113,7 @@ static void read_route(struct nlmsghdr *nh, int nll)
__be64 mac;
} direct_entry;
- if (nh->nlmsg_type == RTM_DELROUTE)
- printf("DELETING Route entry\n");
- else if (nh->nlmsg_type == RTM_GETROUTE)
- printf("READING Route entry\n");
- else if (nh->nlmsg_type == RTM_NEWROUTE)
- printf("NEW Route entry\n");
- else
- printf("%d\n", nh->nlmsg_type);
-
memset(&route, 0, sizeof(route));
- printf("Destination\t\tGateway\t\tGenmask\t\tMetric\t\tIface\n");
for (; NLMSG_OK(nh, nll); nh = NLMSG_NEXT(nh, nll)) {
rt_msg = (struct rtmsg *)NLMSG_DATA(nh);
rtm_family = rt_msg->rtm_family;
@@ -192,11 +150,7 @@ static void read_route(struct nlmsghdr *nh, int nll)
route.gw = atoi(gws);
route.iface = atoi(ifs);
route.metric = atoi(metrics);
- route.iface_name = alloca(sizeof(char *) * IFNAMSIZ);
- route.iface_name = if_indextoname(route.iface, route.iface_name);
- route.mac = getmac(route.iface_name);
- if (route.mac == -1)
- int_exit(0);
+ assert(get_mac_addr(route.iface, &route.mac) == 0);
assert(bpf_map_update_elem(tx_port_map_fd,
&route.iface, &route.iface, 0) == 0);
if (rtm_family == AF_INET) {
@@ -234,14 +188,6 @@ static void read_route(struct nlmsghdr *nh, int nll)
for (i = 0; i < 4; i++)
prefix_key->data[i] = (route.dst >> i * 8) & 0xff;
- printf("%3d.%d.%d.%d\t\t%3x\t\t%d\t\t%d\t\t%s\n",
- (int)prefix_key->data[0],
- (int)prefix_key->data[1],
- (int)prefix_key->data[2],
- (int)prefix_key->data[3],
- route.gw, route.dst_len,
- route.metric,
- route.iface_name);
if (bpf_map_lookup_elem(lpm_map_fd, prefix_key,
prefix_value) < 0) {
for (i = 0; i < 4; i++)
@@ -257,20 +203,13 @@ static void read_route(struct nlmsghdr *nh, int nll)
) == 0);
} else {
if (nh->nlmsg_type == RTM_DELROUTE) {
- printf("deleting entry\n");
- printf("prefix key=%d.%d.%d.%d/%d",
- prefix_key->data[0],
- prefix_key->data[1],
- prefix_key->data[2],
- prefix_key->data[3],
- prefix_key->prefixlen);
assert(bpf_map_delete_elem(lpm_map_fd,
prefix_key
) == 0);
/* Rereading the route table to check if
* there is an entry with the same
* prefix but a different metric as the
- * deleted enty.
+ * deleted entry.
*/
get_route_table(AF_INET);
} else if (prefix_key->data[0] ==
@@ -327,14 +266,14 @@ static int get_route_table(int rtm_family)
sock = socket(AF_NETLINK, SOCK_RAW, NETLINK_ROUTE);
if (sock < 0) {
- printf("open netlink socket: %s\n", strerror(errno));
- return -1;
+ fprintf(stderr, "open netlink socket: %s\n", strerror(errno));
+ return -errno;
}
memset(&sa, 0, sizeof(sa));
sa.nl_family = AF_NETLINK;
if (bind(sock, (struct sockaddr *)&sa, sizeof(sa)) < 0) {
- printf("bind to netlink: %s\n", strerror(errno));
- ret = -1;
+ fprintf(stderr, "bind netlink socket: %s\n", strerror(errno));
+ ret = -errno;
goto cleanup;
}
memset(&req, 0, sizeof(req));
@@ -353,15 +292,15 @@ static int get_route_table(int rtm_family)
msg.msg_iovlen = 1;
ret = sendmsg(sock, &msg, 0);
if (ret < 0) {
- printf("send to netlink: %s\n", strerror(errno));
- ret = -1;
+ fprintf(stderr, "send to netlink: %s\n", strerror(errno));
+ ret = -errno;
goto cleanup;
}
memset(buf, 0, sizeof(buf));
nll = recv_msg(sa, sock);
if (nll < 0) {
- printf("recv from netlink: %s\n", strerror(nll));
- ret = -1;
+ fprintf(stderr, "recv from netlink: %s\n", strerror(nll));
+ ret = nll;
goto cleanup;
}
nh = (struct nlmsghdr *)buf;
@@ -391,9 +330,6 @@ static void read_arp(struct nlmsghdr *nh, int nll)
__be64 mac;
} direct_entry;
- if (nh->nlmsg_type == RTM_GETNEIGH)
- printf("READING arp entry\n");
- printf("Address\tHwAddress\n");
for (; NLMSG_OK(nh, nll); nh = NLMSG_NEXT(nh, nll)) {
rt_msg = (struct ndmsg *)NLMSG_DATA(nh);
rt_attr = (struct rtattr *)RTM_RTA(rt_msg);
@@ -415,7 +351,7 @@ static void read_arp(struct nlmsghdr *nh, int nll)
}
arp_entry.dst = atoi(dsts);
arp_entry.mac = atol(mac);
- printf("%x\t\t%llx\n", arp_entry.dst, arp_entry.mac);
+
if (ndm_family == AF_INET) {
if (bpf_map_lookup_elem(exact_match_map_fd,
&arp_entry.dst,
@@ -466,14 +402,14 @@ static int get_arp_table(int rtm_family)
sock = socket(AF_NETLINK, SOCK_RAW, NETLINK_ROUTE);
if (sock < 0) {
- printf("open netlink socket: %s\n", strerror(errno));
- return -1;
+ fprintf(stderr, "open netlink socket: %s\n", strerror(errno));
+ return -errno;
}
memset(&sa, 0, sizeof(sa));
sa.nl_family = AF_NETLINK;
if (bind(sock, (struct sockaddr *)&sa, sizeof(sa)) < 0) {
- printf("bind to netlink: %s\n", strerror(errno));
- ret = -1;
+ fprintf(stderr, "bind netlink socket: %s\n", strerror(errno));
+ ret = -errno;
goto cleanup;
}
memset(&req, 0, sizeof(req));
@@ -491,15 +427,15 @@ static int get_arp_table(int rtm_family)
msg.msg_iovlen = 1;
ret = sendmsg(sock, &msg, 0);
if (ret < 0) {
- printf("send to netlink: %s\n", strerror(errno));
- ret = -1;
+ fprintf(stderr, "send to netlink: %s\n", strerror(errno));
+ ret = -errno;
goto cleanup;
}
memset(buf, 0, sizeof(buf));
nll = recv_msg(sa, sock);
if (nll < 0) {
- printf("recv from netlink: %s\n", strerror(nll));
- ret = -1;
+ fprintf(stderr, "recv from netlink: %s\n", strerror(nll));
+ ret = nll;
goto cleanup;
}
nh = (struct nlmsghdr *)buf;
@@ -512,24 +448,17 @@ cleanup:
/* Function to keep track and update changes in route and arp table
* Give regular statistics of packets forwarded
*/
-static int monitor_route(void)
+static void *monitor_routes_thread(void *arg)
{
- unsigned int nr_cpus = bpf_num_possible_cpus();
- const unsigned int nr_keys = 256;
struct pollfd fds_route, fds_arp;
- __u64 prev[nr_keys][nr_cpus];
struct sockaddr_nl la, lr;
- __u64 values[nr_cpus];
+ int sock, sock_arp, nll;
struct nlmsghdr *nh;
- int nll, ret = 0;
- int interval = 5;
- __u32 key;
- int i;
sock = socket(AF_NETLINK, SOCK_RAW, NETLINK_ROUTE);
if (sock < 0) {
- printf("open netlink socket: %s\n", strerror(errno));
- return -1;
+ fprintf(stderr, "open netlink socket: %s\n", strerror(errno));
+ return NULL;
}
fcntl(sock, F_SETFL, O_NONBLOCK);
@@ -537,17 +466,19 @@ static int monitor_route(void)
lr.nl_family = AF_NETLINK;
lr.nl_groups = RTMGRP_IPV6_ROUTE | RTMGRP_IPV4_ROUTE | RTMGRP_NOTIFY;
if (bind(sock, (struct sockaddr *)&lr, sizeof(lr)) < 0) {
- printf("bind to netlink: %s\n", strerror(errno));
- ret = -1;
- goto cleanup;
+ fprintf(stderr, "bind netlink socket: %s\n", strerror(errno));
+ close(sock);
+ return NULL;
}
+
fds_route.fd = sock;
fds_route.events = POLL_IN;
sock_arp = socket(AF_NETLINK, SOCK_RAW, NETLINK_ROUTE);
if (sock_arp < 0) {
- printf("open netlink socket: %s\n", strerror(errno));
- return -1;
+ fprintf(stderr, "open netlink socket: %s\n", strerror(errno));
+ close(sock);
+ return NULL;
}
fcntl(sock_arp, F_SETFL, O_NONBLOCK);
@@ -555,51 +486,44 @@ static int monitor_route(void)
la.nl_family = AF_NETLINK;
la.nl_groups = RTMGRP_NEIGH | RTMGRP_NOTIFY;
if (bind(sock_arp, (struct sockaddr *)&la, sizeof(la)) < 0) {
- printf("bind to netlink: %s\n", strerror(errno));
- ret = -1;
+ fprintf(stderr, "bind netlink socket: %s\n", strerror(errno));
goto cleanup;
}
+
fds_arp.fd = sock_arp;
fds_arp.events = POLL_IN;
- memset(prev, 0, sizeof(prev));
- do {
- signal(SIGINT, close_and_exit);
- signal(SIGTERM, close_and_exit);
+ /* dump route and arp tables */
+ if (get_arp_table(AF_INET) < 0) {
+ fprintf(stderr, "Failed reading arp table\n");
+ goto cleanup;
+ }
- sleep(interval);
- for (key = 0; key < nr_keys; key++) {
- __u64 sum = 0;
-
- assert(bpf_map_lookup_elem(rxcnt_map_fd,
- &key, values) == 0);
- for (i = 0; i < nr_cpus; i++)
- sum += (values[i] - prev[key][i]);
- if (sum)
- printf("proto %u: %10llu pkt/s\n",
- key, sum / interval);
- memcpy(prev[key], values, sizeof(values));
- }
+ if (get_route_table(AF_INET) < 0) {
+ fprintf(stderr, "Failed reading route table\n");
+ goto cleanup;
+ }
+ while (!routes_thread_exit) {
memset(buf, 0, sizeof(buf));
if (poll(&fds_route, 1, 3) == POLL_IN) {
nll = recv_msg(lr, sock);
if (nll < 0) {
- printf("recv from netlink: %s\n", strerror(nll));
- ret = -1;
+ fprintf(stderr, "recv from netlink: %s\n",
+ strerror(nll));
goto cleanup;
}
nh = (struct nlmsghdr *)buf;
- printf("Routing table updated.\n");
read_route(nh, nll);
}
+
memset(buf, 0, sizeof(buf));
if (poll(&fds_arp, 1, 3) == POLL_IN) {
nll = recv_msg(la, sock_arp);
if (nll < 0) {
- printf("recv from netlink: %s\n", strerror(nll));
- ret = -1;
+ fprintf(stderr, "recv from netlink: %s\n",
+ strerror(nll));
goto cleanup;
}
@@ -607,135 +531,169 @@ static int monitor_route(void)
read_arp(nh, nll);
}
- } while (1);
+ sleep(interval);
+ }
+
cleanup:
+ close(sock_arp);
close(sock);
- return ret;
+ return NULL;
}
-static void usage(const char *prog)
+static void usage(char *argv[], const struct option *long_options,
+ const char *doc, int mask, bool error,
+ struct bpf_object *obj)
{
- fprintf(stderr,
- "%s: %s [OPTS] interface name list\n\n"
- "OPTS:\n"
- " -S use skb-mode\n"
- " -F force loading prog\n",
- __func__, prog);
+ sample_usage(argv, long_options, doc, mask, error);
}
-int main(int ac, char **argv)
+int main(int argc, char **argv)
{
- struct rlimit r = {RLIM_INFINITY, RLIM_INFINITY};
- struct bpf_prog_load_attr prog_load_attr = {
- .prog_type = BPF_PROG_TYPE_XDP,
- };
- struct bpf_prog_info info = {};
- __u32 info_len = sizeof(info);
- const char *optstr = "SF";
- struct bpf_object *obj;
- char filename[256];
- char **ifname_list;
- int prog_fd, opt;
- int err, i = 1;
-
- snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]);
- prog_load_attr.file = filename;
-
- total_ifindex = ac - 1;
- ifname_list = (argv + 1);
-
- while ((opt = getopt(ac, argv, optstr)) != -1) {
+ bool error = true, generic = false, force = false;
+ int opt, ret = EXIT_FAIL_BPF;
+ struct xdp_router_ipv4 *skel;
+ int i, total_ifindex = argc - 1;
+ char **ifname_list = argv + 1;
+ pthread_t routes_thread;
+ int longindex = 0;
+
+ if (libbpf_set_strict_mode(LIBBPF_STRICT_ALL) < 0) {
+ fprintf(stderr, "Failed to set libbpf strict mode: %s\n",
+ strerror(errno));
+ goto end;
+ }
+
+ skel = xdp_router_ipv4__open();
+ if (!skel) {
+ fprintf(stderr, "Failed to xdp_router_ipv4__open: %s\n",
+ strerror(errno));
+ goto end;
+ }
+
+ ret = sample_init_pre_load(skel);
+ if (ret < 0) {
+ fprintf(stderr, "Failed to sample_init_pre_load: %s\n",
+ strerror(-ret));
+ ret = EXIT_FAIL_BPF;
+ goto end_destroy;
+ }
+
+ ret = xdp_router_ipv4__load(skel);
+ if (ret < 0) {
+ fprintf(stderr, "Failed to xdp_router_ipv4__load: %s\n",
+ strerror(errno));
+ goto end_destroy;
+ }
+
+ ret = sample_init(skel, mask);
+ if (ret < 0) {
+ fprintf(stderr, "Failed to initialize sample: %s\n", strerror(-ret));
+ ret = EXIT_FAIL;
+ goto end_destroy;
+ }
+
+ while ((opt = getopt_long(argc, argv, "si:SFvh",
+ long_options, &longindex)) != -1) {
switch (opt) {
+ case 's':
+ mask |= SAMPLE_REDIRECT_MAP_CNT;
+ total_ifindex--;
+ ifname_list++;
+ break;
+ case 'i':
+ interval = strtoul(optarg, NULL, 0);
+ total_ifindex -= 2;
+ ifname_list += 2;
+ break;
case 'S':
- flags |= XDP_FLAGS_SKB_MODE;
+ generic = true;
total_ifindex--;
ifname_list++;
break;
case 'F':
- flags &= ~XDP_FLAGS_UPDATE_IF_NOEXIST;
+ force = true;
total_ifindex--;
ifname_list++;
break;
+ case 'v':
+ sample_switch_mode();
+ total_ifindex--;
+ ifname_list++;
+ break;
+ case 'h':
+ error = false;
default:
- usage(basename(argv[0]));
- return 1;
+ usage(argv, long_options, __doc__, mask, error, skel->obj);
+ goto end_destroy;
}
}
- if (!(flags & XDP_FLAGS_SKB_MODE))
- flags |= XDP_FLAGS_DRV_MODE;
-
- if (optind == ac) {
- usage(basename(argv[0]));
- return 1;
+ ret = EXIT_FAIL_OPTION;
+ if (optind == argc) {
+ usage(argv, long_options, __doc__, mask, true, skel->obj);
+ goto end_destroy;
}
- if (setrlimit(RLIMIT_MEMLOCK, &r)) {
- perror("setrlimit(RLIMIT_MEMLOCK)");
- return 1;
+ lpm_map_fd = bpf_map__fd(skel->maps.lpm_map);
+ if (lpm_map_fd < 0) {
+ fprintf(stderr, "Failed loading lpm_map %s\n",
+ strerror(-lpm_map_fd));
+ goto end_destroy;
}
-
- if (bpf_prog_load_xattr(&prog_load_attr, &obj, &prog_fd))
- return 1;
-
- printf("\n**************loading bpf file*********************\n\n\n");
- if (!prog_fd) {
- printf("bpf_prog_load_xattr: %s\n", strerror(errno));
- return 1;
+ arp_table_map_fd = bpf_map__fd(skel->maps.arp_table);
+ if (arp_table_map_fd < 0) {
+ fprintf(stderr, "Failed loading arp_table_map_fd %s\n",
+ strerror(-arp_table_map_fd));
+ goto end_destroy;
}
-
- lpm_map_fd = bpf_object__find_map_fd_by_name(obj, "lpm_map");
- rxcnt_map_fd = bpf_object__find_map_fd_by_name(obj, "rxcnt");
- arp_table_map_fd = bpf_object__find_map_fd_by_name(obj, "arp_table");
- exact_match_map_fd = bpf_object__find_map_fd_by_name(obj,
- "exact_match");
- tx_port_map_fd = bpf_object__find_map_fd_by_name(obj, "tx_port");
- if (lpm_map_fd < 0 || rxcnt_map_fd < 0 || arp_table_map_fd < 0 ||
- exact_match_map_fd < 0 || tx_port_map_fd < 0) {
- printf("bpf_object__find_map_fd_by_name failed\n");
- return 1;
+ exact_match_map_fd = bpf_map__fd(skel->maps.exact_match);
+ if (exact_match_map_fd < 0) {
+ fprintf(stderr, "Failed loading exact_match_map_fd %s\n",
+ strerror(-exact_match_map_fd));
+ goto end_destroy;
}
-
- ifindex_list = (int *)calloc(total_ifindex, sizeof(int *));
- for (i = 0; i < total_ifindex; i++) {
- ifindex_list[i] = if_nametoindex(ifname_list[i]);
- if (!ifindex_list[i]) {
- printf("Couldn't translate interface name: %s",
- strerror(errno));
- return 1;
- }
+ tx_port_map_fd = bpf_map__fd(skel->maps.tx_port);
+ if (tx_port_map_fd < 0) {
+ fprintf(stderr, "Failed loading tx_port_map_fd %s\n",
+ strerror(-tx_port_map_fd));
+ goto end_destroy;
}
- prog_id_list = (__u32 *)calloc(total_ifindex, sizeof(__u32 *));
- for (i = 0; i < total_ifindex; i++) {
- if (bpf_set_link_xdp_fd(ifindex_list[i], prog_fd, flags) < 0) {
- printf("link set xdp fd failed\n");
- int recovery_index = i;
- for (i = 0; i < recovery_index; i++)
- bpf_set_link_xdp_fd(ifindex_list[i], -1, flags);
+ ret = EXIT_FAIL_XDP;
+ for (i = 0; i < total_ifindex; i++) {
+ int index = if_nametoindex(ifname_list[i]);
- return 1;
+ if (!index) {
+ fprintf(stderr, "Interface %s not found %s\n",
+ ifname_list[i], strerror(-tx_port_map_fd));
+ goto end_destroy;
}
- err = bpf_obj_get_info_by_fd(prog_fd, &info, &info_len);
- if (err) {
- printf("can't get prog info - %s\n", strerror(errno));
- return err;
- }
- prog_id_list[i] = info.id;
- memset(&info, 0, sizeof(info));
- printf("Attached to %d\n", ifindex_list[i]);
+ if (sample_install_xdp(skel->progs.xdp_router_ipv4_prog,
+ index, generic, force) < 0)
+ goto end_destroy;
}
- signal(SIGINT, int_exit);
- signal(SIGTERM, int_exit);
- printf("*******************ROUTE TABLE*************************\n\n\n");
- get_route_table(AF_INET);
- printf("*******************ARP TABLE***************************\n\n\n");
- get_arp_table(AF_INET);
- if (monitor_route() < 0) {
- printf("Error in receiving route update");
- return 1;
+ ret = pthread_create(&routes_thread, NULL, monitor_routes_thread, NULL);
+ if (ret) {
+ fprintf(stderr, "Failed creating routes_thread: %s\n", strerror(-ret));
+ ret = EXIT_FAIL;
+ goto end_destroy;
}
- return 0;
+ ret = sample_run(interval, NULL, NULL);
+ routes_thread_exit = true;
+
+ if (ret < 0) {
+ fprintf(stderr, "Failed during sample run: %s\n", strerror(-ret));
+ ret = EXIT_FAIL;
+ goto end_thread_wait;
+ }
+ ret = EXIT_OK;
+
+end_thread_wait:
+ pthread_join(routes_thread, NULL);
+end_destroy:
+ xdp_router_ipv4__destroy(skel);
+end:
+ sample_exit(ret);
}
diff --git a/samples/bpf/xdp_rxq_info_user.c b/samples/bpf/xdp_rxq_info_user.c
index 4fe47502ebed..08f5331d2b00 100644
--- a/samples/bpf/xdp_rxq_info_user.c
+++ b/samples/bpf/xdp_rxq_info_user.c
@@ -14,11 +14,10 @@ static const char *__doc__ = " XDP RX-queue info extract example\n\n"
#include <string.h>
#include <unistd.h>
#include <locale.h>
-#include <sys/resource.h>
#include <getopt.h>
#include <net/if.h>
#include <time.h>
-
+#include <limits.h>
#include <arpa/inet.h>
#include <linux/if_link.h>
@@ -44,6 +43,9 @@ static struct bpf_map *rx_queue_index_map;
#define EXIT_FAIL_BPF 4
#define EXIT_FAIL_MEM 5
+#define FAIL_MEM_SIG INT_MAX
+#define FAIL_STAT_SIG (INT_MAX - 1)
+
static const struct option long_options[] = {
{"help", no_argument, NULL, 'h' },
{"dev", required_argument, NULL, 'd' },
@@ -62,21 +64,27 @@ static void int_exit(int sig)
__u32 curr_prog_id = 0;
if (ifindex > -1) {
- if (bpf_get_link_xdp_id(ifindex, &curr_prog_id, xdp_flags)) {
- printf("bpf_get_link_xdp_id failed\n");
+ if (bpf_xdp_query_id(ifindex, xdp_flags, &curr_prog_id)) {
+ printf("bpf_xdp_query_id failed\n");
exit(EXIT_FAIL);
}
if (prog_id == curr_prog_id) {
fprintf(stderr,
"Interrupted: Removing XDP program on ifindex:%d device:%s\n",
ifindex, ifname);
- bpf_set_link_xdp_fd(ifindex, -1, xdp_flags);
+ bpf_xdp_detach(ifindex, xdp_flags, NULL);
} else if (!curr_prog_id) {
printf("couldn't find a prog id on a given iface\n");
} else {
printf("program on interface changed, not removing\n");
}
}
+
+ if (sig == FAIL_MEM_SIG)
+ exit(EXIT_FAIL_MEM);
+ else if (sig == FAIL_STAT_SIG)
+ exit(EXIT_FAIL);
+
exit(EXIT_OK);
}
@@ -141,7 +149,8 @@ static char* options2str(enum cfg_options_flags flag)
if (flag & READ_MEM)
return "read";
fprintf(stderr, "ERR: Unknown config option flags");
- exit(EXIT_FAIL);
+ int_exit(FAIL_STAT_SIG);
+ return "unknown";
}
static void usage(char *argv[])
@@ -174,7 +183,7 @@ static __u64 gettime(void)
res = clock_gettime(CLOCK_MONOTONIC, &t);
if (res < 0) {
fprintf(stderr, "Error with gettimeofday! (%i)\n", res);
- exit(EXIT_FAIL);
+ int_exit(FAIL_STAT_SIG);
}
return (__u64) t.tv_sec * NANOSEC_PER_SEC + t.tv_nsec;
}
@@ -198,45 +207,38 @@ static struct datarec *alloc_record_per_cpu(void)
{
unsigned int nr_cpus = bpf_num_possible_cpus();
struct datarec *array;
- size_t size;
- size = sizeof(struct datarec) * nr_cpus;
- array = malloc(size);
- memset(array, 0, size);
+ array = calloc(nr_cpus, sizeof(struct datarec));
if (!array) {
fprintf(stderr, "Mem alloc error (nr_cpus:%u)\n", nr_cpus);
- exit(EXIT_FAIL_MEM);
+ int_exit(FAIL_MEM_SIG);
}
return array;
}
static struct record *alloc_record_per_rxq(void)
{
- unsigned int nr_rxqs = bpf_map__def(rx_queue_index_map)->max_entries;
+ unsigned int nr_rxqs = bpf_map__max_entries(rx_queue_index_map);
struct record *array;
- size_t size;
- size = sizeof(struct record) * nr_rxqs;
- array = malloc(size);
- memset(array, 0, size);
+ array = calloc(nr_rxqs, sizeof(struct record));
if (!array) {
fprintf(stderr, "Mem alloc error (nr_rxqs:%u)\n", nr_rxqs);
- exit(EXIT_FAIL_MEM);
+ int_exit(FAIL_MEM_SIG);
}
return array;
}
static struct stats_record *alloc_stats_record(void)
{
- unsigned int nr_rxqs = bpf_map__def(rx_queue_index_map)->max_entries;
+ unsigned int nr_rxqs = bpf_map__max_entries(rx_queue_index_map);
struct stats_record *rec;
int i;
- rec = malloc(sizeof(*rec));
- memset(rec, 0, sizeof(*rec));
+ rec = calloc(1, sizeof(struct stats_record));
if (!rec) {
fprintf(stderr, "Mem alloc error\n");
- exit(EXIT_FAIL_MEM);
+ int_exit(FAIL_MEM_SIG);
}
rec->rxq = alloc_record_per_rxq();
for (i = 0; i < nr_rxqs; i++)
@@ -248,7 +250,7 @@ static struct stats_record *alloc_stats_record(void)
static void free_stats_record(struct stats_record *r)
{
- unsigned int nr_rxqs = bpf_map__def(rx_queue_index_map)->max_entries;
+ unsigned int nr_rxqs = bpf_map__max_entries(rx_queue_index_map);
int i;
for (i = 0; i < nr_rxqs; i++)
@@ -296,7 +298,7 @@ static void stats_collect(struct stats_record *rec)
map_collect_percpu(fd, 0, &rec->stats);
fd = bpf_map__fd(rx_queue_index_map);
- max_rxqs = bpf_map__def(rx_queue_index_map)->max_entries;
+ max_rxqs = bpf_map__max_entries(rx_queue_index_map);
for (i = 0; i < max_rxqs; i++)
map_collect_percpu(fd, i, &rec->rxq[i]);
}
@@ -342,7 +344,7 @@ static void stats_print(struct stats_record *stats_rec,
struct stats_record *stats_prev,
int action, __u32 cfg_opt)
{
- unsigned int nr_rxqs = bpf_map__def(rx_queue_index_map)->max_entries;
+ unsigned int nr_rxqs = bpf_map__max_entries(rx_queue_index_map);
unsigned int nr_cpus = bpf_num_possible_cpus();
double pps = 0, err = 0;
struct record *rec, *prev;
@@ -457,15 +459,12 @@ static void stats_poll(int interval, int action, __u32 cfg_opt)
int main(int argc, char **argv)
{
__u32 cfg_options= NO_TOUCH ; /* Default: Don't touch packet memory */
- struct rlimit r = {10 * 1024 * 1024, RLIM_INFINITY};
- struct bpf_prog_load_attr prog_load_attr = {
- .prog_type = BPF_PROG_TYPE_XDP,
- };
struct bpf_prog_info info = {};
__u32 info_len = sizeof(info);
int prog_fd, map_fd, opt, err;
bool use_separators = true;
struct config cfg = { 0 };
+ struct bpf_program *prog;
struct bpf_object *obj;
struct bpf_map *map;
char filename[256];
@@ -479,15 +478,18 @@ int main(int argc, char **argv)
char *action_str = NULL;
snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]);
- prog_load_attr.file = filename;
- if (setrlimit(RLIMIT_MEMLOCK, &r)) {
- perror("setrlimit(RLIMIT_MEMLOCK)");
- return 1;
- }
+ obj = bpf_object__open_file(filename, NULL);
+ if (libbpf_get_error(obj))
+ return EXIT_FAIL;
+
+ prog = bpf_object__next_program(obj, NULL);
+ bpf_program__set_type(prog, BPF_PROG_TYPE_XDP);
- if (bpf_prog_load_xattr(&prog_load_attr, &obj, &prog_fd))
+ err = bpf_object__load(obj);
+ if (err)
return EXIT_FAIL;
+ prog_fd = bpf_program__fd(prog);
map = bpf_object__find_map_by_name(obj, "config_map");
stats_global_map = bpf_object__find_map_by_name(obj, "stats_global_map");
@@ -595,7 +597,7 @@ int main(int argc, char **argv)
signal(SIGINT, int_exit);
signal(SIGTERM, int_exit);
- if (bpf_set_link_xdp_fd(ifindex, prog_fd, xdp_flags) < 0) {
+ if (bpf_xdp_attach(ifindex, prog_fd, xdp_flags, NULL) < 0) {
fprintf(stderr, "link set xdp fd failed\n");
return EXIT_FAIL_XDP;
}
diff --git a/samples/bpf/xdp_sample.bpf.c b/samples/bpf/xdp_sample.bpf.c
new file mode 100644
index 000000000000..0eb7e1dcae22
--- /dev/null
+++ b/samples/bpf/xdp_sample.bpf.c
@@ -0,0 +1,266 @@
+// SPDX-License-Identifier: GPL-2.0
+/* GPLv2, Copyright(c) 2017 Jesper Dangaard Brouer, Red Hat, Inc. */
+#include "xdp_sample.bpf.h"
+
+#include <bpf/bpf_tracing.h>
+#include <bpf/bpf_core_read.h>
+#include <bpf/bpf_helpers.h>
+
+array_map rx_cnt SEC(".maps");
+array_map redir_err_cnt SEC(".maps");
+array_map cpumap_enqueue_cnt SEC(".maps");
+array_map cpumap_kthread_cnt SEC(".maps");
+array_map exception_cnt SEC(".maps");
+array_map devmap_xmit_cnt SEC(".maps");
+
+struct {
+ __uint(type, BPF_MAP_TYPE_PERCPU_HASH);
+ __uint(max_entries, 32 * 32);
+ __type(key, u64);
+ __type(value, struct datarec);
+} devmap_xmit_cnt_multi SEC(".maps");
+
+const volatile int nr_cpus = 0;
+
+/* These can be set before loading so that redundant comparisons can be DCE'd by
+ * the verifier, and only actual matches are tried after loading tp_btf program.
+ * This allows sample to filter tracepoint stats based on net_device.
+ */
+const volatile int from_match[32] = {};
+const volatile int to_match[32] = {};
+
+int cpumap_map_id = 0;
+
+/* Find if b is part of set a, but if a is empty set then evaluate to true */
+#define IN_SET(a, b) \
+ ({ \
+ bool __res = !(a)[0]; \
+ for (int i = 0; i < ARRAY_SIZE(a) && (a)[i]; i++) { \
+ __res = (a)[i] == (b); \
+ if (__res) \
+ break; \
+ } \
+ __res; \
+ })
+
+static __always_inline __u32 xdp_get_err_key(int err)
+{
+ switch (err) {
+ case 0:
+ return 0;
+ case -EINVAL:
+ return 2;
+ case -ENETDOWN:
+ return 3;
+ case -EMSGSIZE:
+ return 4;
+ case -EOPNOTSUPP:
+ return 5;
+ case -ENOSPC:
+ return 6;
+ default:
+ return 1;
+ }
+}
+
+static __always_inline int xdp_redirect_collect_stat(int from, int err)
+{
+ u32 cpu = bpf_get_smp_processor_id();
+ u32 key = XDP_REDIRECT_ERROR;
+ struct datarec *rec;
+ u32 idx;
+
+ if (!IN_SET(from_match, from))
+ return 0;
+
+ key = xdp_get_err_key(err);
+
+ idx = key * nr_cpus + cpu;
+ rec = bpf_map_lookup_elem(&redir_err_cnt, &idx);
+ if (!rec)
+ return 0;
+ if (key)
+ NO_TEAR_INC(rec->dropped);
+ else
+ NO_TEAR_INC(rec->processed);
+ return 0; /* Indicate event was filtered (no further processing)*/
+ /*
+ * Returning 1 here would allow e.g. a perf-record tracepoint
+ * to see and record these events, but it doesn't work well
+ * in-practice as stopping perf-record also unload this
+ * bpf_prog. Plus, there is additional overhead of doing so.
+ */
+}
+
+SEC("tp_btf/xdp_redirect_err")
+int BPF_PROG(tp_xdp_redirect_err, const struct net_device *dev,
+ const struct bpf_prog *xdp, const void *tgt, int err,
+ const struct bpf_map *map, u32 index)
+{
+ return xdp_redirect_collect_stat(dev->ifindex, err);
+}
+
+SEC("tp_btf/xdp_redirect_map_err")
+int BPF_PROG(tp_xdp_redirect_map_err, const struct net_device *dev,
+ const struct bpf_prog *xdp, const void *tgt, int err,
+ const struct bpf_map *map, u32 index)
+{
+ return xdp_redirect_collect_stat(dev->ifindex, err);
+}
+
+SEC("tp_btf/xdp_redirect")
+int BPF_PROG(tp_xdp_redirect, const struct net_device *dev,
+ const struct bpf_prog *xdp, const void *tgt, int err,
+ const struct bpf_map *map, u32 index)
+{
+ return xdp_redirect_collect_stat(dev->ifindex, err);
+}
+
+SEC("tp_btf/xdp_redirect_map")
+int BPF_PROG(tp_xdp_redirect_map, const struct net_device *dev,
+ const struct bpf_prog *xdp, const void *tgt, int err,
+ const struct bpf_map *map, u32 index)
+{
+ return xdp_redirect_collect_stat(dev->ifindex, err);
+}
+
+SEC("tp_btf/xdp_cpumap_enqueue")
+int BPF_PROG(tp_xdp_cpumap_enqueue, int map_id, unsigned int processed,
+ unsigned int drops, int to_cpu)
+{
+ u32 cpu = bpf_get_smp_processor_id();
+ struct datarec *rec;
+ u32 idx;
+
+ if (cpumap_map_id && cpumap_map_id != map_id)
+ return 0;
+
+ idx = to_cpu * nr_cpus + cpu;
+ rec = bpf_map_lookup_elem(&cpumap_enqueue_cnt, &idx);
+ if (!rec)
+ return 0;
+ NO_TEAR_ADD(rec->processed, processed);
+ NO_TEAR_ADD(rec->dropped, drops);
+ /* Record bulk events, then userspace can calc average bulk size */
+ if (processed > 0)
+ NO_TEAR_INC(rec->issue);
+ /* Inception: It's possible to detect overload situations, via
+ * this tracepoint. This can be used for creating a feedback
+ * loop to XDP, which can take appropriate actions to mitigate
+ * this overload situation.
+ */
+ return 0;
+}
+
+SEC("tp_btf/xdp_cpumap_kthread")
+int BPF_PROG(tp_xdp_cpumap_kthread, int map_id, unsigned int processed,
+ unsigned int drops, int sched, struct xdp_cpumap_stats *xdp_stats)
+{
+ struct datarec *rec;
+ u32 cpu;
+
+ if (cpumap_map_id && cpumap_map_id != map_id)
+ return 0;
+
+ cpu = bpf_get_smp_processor_id();
+ rec = bpf_map_lookup_elem(&cpumap_kthread_cnt, &cpu);
+ if (!rec)
+ return 0;
+ NO_TEAR_ADD(rec->processed, processed);
+ NO_TEAR_ADD(rec->dropped, drops);
+ NO_TEAR_ADD(rec->xdp_pass, xdp_stats->pass);
+ NO_TEAR_ADD(rec->xdp_drop, xdp_stats->drop);
+ NO_TEAR_ADD(rec->xdp_redirect, xdp_stats->redirect);
+ /* Count times kthread yielded CPU via schedule call */
+ if (sched)
+ NO_TEAR_INC(rec->issue);
+ return 0;
+}
+
+SEC("tp_btf/xdp_exception")
+int BPF_PROG(tp_xdp_exception, const struct net_device *dev,
+ const struct bpf_prog *xdp, u32 act)
+{
+ u32 cpu = bpf_get_smp_processor_id();
+ struct datarec *rec;
+ u32 key = act, idx;
+
+ if (!IN_SET(from_match, dev->ifindex))
+ return 0;
+ if (!IN_SET(to_match, dev->ifindex))
+ return 0;
+
+ if (key > XDP_REDIRECT)
+ key = XDP_REDIRECT + 1;
+
+ idx = key * nr_cpus + cpu;
+ rec = bpf_map_lookup_elem(&exception_cnt, &idx);
+ if (!rec)
+ return 0;
+ NO_TEAR_INC(rec->dropped);
+
+ return 0;
+}
+
+SEC("tp_btf/xdp_devmap_xmit")
+int BPF_PROG(tp_xdp_devmap_xmit, const struct net_device *from_dev,
+ const struct net_device *to_dev, int sent, int drops, int err)
+{
+ struct datarec *rec;
+ int idx_in, idx_out;
+ u32 cpu;
+
+ idx_in = from_dev->ifindex;
+ idx_out = to_dev->ifindex;
+
+ if (!IN_SET(from_match, idx_in))
+ return 0;
+ if (!IN_SET(to_match, idx_out))
+ return 0;
+
+ cpu = bpf_get_smp_processor_id();
+ rec = bpf_map_lookup_elem(&devmap_xmit_cnt, &cpu);
+ if (!rec)
+ return 0;
+ NO_TEAR_ADD(rec->processed, sent);
+ NO_TEAR_ADD(rec->dropped, drops);
+ /* Record bulk events, then userspace can calc average bulk size */
+ NO_TEAR_INC(rec->info);
+ /* Record error cases, where no frame were sent */
+ /* Catch API error of drv ndo_xdp_xmit sent more than count */
+ if (err || drops < 0)
+ NO_TEAR_INC(rec->issue);
+ return 0;
+}
+
+SEC("tp_btf/xdp_devmap_xmit")
+int BPF_PROG(tp_xdp_devmap_xmit_multi, const struct net_device *from_dev,
+ const struct net_device *to_dev, int sent, int drops, int err)
+{
+ struct datarec empty = {};
+ struct datarec *rec;
+ int idx_in, idx_out;
+ u64 idx;
+
+ idx_in = from_dev->ifindex;
+ idx_out = to_dev->ifindex;
+ idx = idx_in;
+ idx = idx << 32 | idx_out;
+
+ if (!IN_SET(from_match, idx_in))
+ return 0;
+ if (!IN_SET(to_match, idx_out))
+ return 0;
+
+ bpf_map_update_elem(&devmap_xmit_cnt_multi, &idx, &empty, BPF_NOEXIST);
+ rec = bpf_map_lookup_elem(&devmap_xmit_cnt_multi, &idx);
+ if (!rec)
+ return 0;
+
+ NO_TEAR_ADD(rec->processed, sent);
+ NO_TEAR_ADD(rec->dropped, drops);
+ NO_TEAR_INC(rec->info);
+ if (err || drops < 0)
+ NO_TEAR_INC(rec->issue);
+ return 0;
+}
diff --git a/samples/bpf/xdp_sample.bpf.h b/samples/bpf/xdp_sample.bpf.h
new file mode 100644
index 000000000000..25b1dbe9b37b
--- /dev/null
+++ b/samples/bpf/xdp_sample.bpf.h
@@ -0,0 +1,141 @@
+// SPDX-License-Identifier: GPL-2.0
+#ifndef _XDP_SAMPLE_BPF_H
+#define _XDP_SAMPLE_BPF_H
+
+#include "vmlinux.h"
+#include <bpf/bpf_tracing.h>
+#include <bpf/bpf_core_read.h>
+#include <bpf/bpf_helpers.h>
+
+#include "xdp_sample_shared.h"
+
+#define ETH_ALEN 6
+#define ETH_P_802_3_MIN 0x0600
+#define ETH_P_8021Q 0x8100
+#define ETH_P_8021AD 0x88A8
+#define ETH_P_IP 0x0800
+#define ETH_P_IPV6 0x86DD
+#define ETH_P_ARP 0x0806
+#define IPPROTO_ICMPV6 58
+
+#define EINVAL 22
+#define ENETDOWN 100
+#define EMSGSIZE 90
+#define EOPNOTSUPP 95
+#define ENOSPC 28
+
+typedef struct {
+ __uint(type, BPF_MAP_TYPE_ARRAY);
+ __uint(map_flags, BPF_F_MMAPABLE);
+ __type(key, unsigned int);
+ __type(value, struct datarec);
+} array_map;
+
+extern array_map rx_cnt;
+extern const volatile int nr_cpus;
+
+enum {
+ XDP_REDIRECT_SUCCESS = 0,
+ XDP_REDIRECT_ERROR = 1
+};
+
+static __always_inline void swap_src_dst_mac(void *data)
+{
+ unsigned short *p = data;
+ unsigned short dst[3];
+
+ dst[0] = p[0];
+ dst[1] = p[1];
+ dst[2] = p[2];
+ p[0] = p[3];
+ p[1] = p[4];
+ p[2] = p[5];
+ p[3] = dst[0];
+ p[4] = dst[1];
+ p[5] = dst[2];
+}
+
+#if defined(__BYTE_ORDER__) && defined(__ORDER_LITTLE_ENDIAN__) && \
+ __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
+#define bpf_ntohs(x) __builtin_bswap16(x)
+#define bpf_htons(x) __builtin_bswap16(x)
+#elif defined(__BYTE_ORDER__) && defined(__ORDER_BIG_ENDIAN__) && \
+ __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
+#define bpf_ntohs(x) (x)
+#define bpf_htons(x) (x)
+#else
+# error "Endianness detection needs to be set up for your compiler?!"
+#endif
+
+/*
+ * Note: including linux/compiler.h or linux/kernel.h for the macros below
+ * conflicts with vmlinux.h include in BPF files, so we define them here.
+ *
+ * Following functions are taken from kernel sources and
+ * break aliasing rules in their original form.
+ *
+ * While kernel is compiled with -fno-strict-aliasing,
+ * perf uses -Wstrict-aliasing=3 which makes build fail
+ * under gcc 4.4.
+ *
+ * Using extra __may_alias__ type to allow aliasing
+ * in this case.
+ */
+typedef __u8 __attribute__((__may_alias__)) __u8_alias_t;
+typedef __u16 __attribute__((__may_alias__)) __u16_alias_t;
+typedef __u32 __attribute__((__may_alias__)) __u32_alias_t;
+typedef __u64 __attribute__((__may_alias__)) __u64_alias_t;
+
+static __always_inline void __read_once_size(const volatile void *p, void *res, int size)
+{
+ switch (size) {
+ case 1: *(__u8_alias_t *) res = *(volatile __u8_alias_t *) p; break;
+ case 2: *(__u16_alias_t *) res = *(volatile __u16_alias_t *) p; break;
+ case 4: *(__u32_alias_t *) res = *(volatile __u32_alias_t *) p; break;
+ case 8: *(__u64_alias_t *) res = *(volatile __u64_alias_t *) p; break;
+ default:
+ asm volatile ("" : : : "memory");
+ __builtin_memcpy((void *)res, (const void *)p, size);
+ asm volatile ("" : : : "memory");
+ }
+}
+
+static __always_inline void __write_once_size(volatile void *p, void *res, int size)
+{
+ switch (size) {
+ case 1: *(volatile __u8_alias_t *) p = *(__u8_alias_t *) res; break;
+ case 2: *(volatile __u16_alias_t *) p = *(__u16_alias_t *) res; break;
+ case 4: *(volatile __u32_alias_t *) p = *(__u32_alias_t *) res; break;
+ case 8: *(volatile __u64_alias_t *) p = *(__u64_alias_t *) res; break;
+ default:
+ asm volatile ("" : : : "memory");
+ __builtin_memcpy((void *)p, (const void *)res, size);
+ asm volatile ("" : : : "memory");
+ }
+}
+
+#define READ_ONCE(x) \
+({ \
+ union { typeof(x) __val; char __c[1]; } __u = \
+ { .__c = { 0 } }; \
+ __read_once_size(&(x), __u.__c, sizeof(x)); \
+ __u.__val; \
+})
+
+#define WRITE_ONCE(x, val) \
+({ \
+ union { typeof(x) __val; char __c[1]; } __u = \
+ { .__val = (val) }; \
+ __write_once_size(&(x), __u.__c, sizeof(x)); \
+ __u.__val; \
+})
+
+/* Add a value using relaxed read and relaxed write. Less expensive than
+ * fetch_add when there is no write concurrency.
+ */
+#define NO_TEAR_ADD(x, val) WRITE_ONCE((x), READ_ONCE(x) + (val))
+#define NO_TEAR_INC(x) NO_TEAR_ADD((x), 1)
+
+#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
+
+#endif
diff --git a/samples/bpf/xdp_sample_pkts_kern.c b/samples/bpf/xdp_sample_pkts_kern.c
index 33377289e2a8..9cf76b340dd7 100644
--- a/samples/bpf/xdp_sample_pkts_kern.c
+++ b/samples/bpf/xdp_sample_pkts_kern.c
@@ -5,14 +5,12 @@
#include <bpf/bpf_helpers.h>
#define SAMPLE_SIZE 64ul
-#define MAX_CPUS 128
-
-struct bpf_map_def SEC("maps") my_map = {
- .type = BPF_MAP_TYPE_PERF_EVENT_ARRAY,
- .key_size = sizeof(int),
- .value_size = sizeof(u32),
- .max_entries = MAX_CPUS,
-};
+
+struct {
+ __uint(type, BPF_MAP_TYPE_PERF_EVENT_ARRAY);
+ __uint(key_size, sizeof(int));
+ __uint(value_size, sizeof(u32));
+} my_map SEC(".maps");
SEC("xdp_sample")
int xdp_sample_prog(struct xdp_md *ctx)
diff --git a/samples/bpf/xdp_sample_pkts_user.c b/samples/bpf/xdp_sample_pkts_user.c
index 991ef6f0880b..7df7163239ac 100644
--- a/samples/bpf/xdp_sample_pkts_user.c
+++ b/samples/bpf/xdp_sample_pkts_user.c
@@ -12,13 +12,11 @@
#include <signal.h>
#include <bpf/libbpf.h>
#include <bpf/bpf.h>
-#include <sys/resource.h>
#include <libgen.h>
#include <linux/if_link.h>
#include "perf-sys.h"
-#define MAX_CPUS 128
static int if_idx;
static char *if_name;
static __u32 xdp_flags = XDP_FLAGS_UPDATE_IF_NOEXIST;
@@ -31,7 +29,7 @@ static int do_attach(int idx, int fd, const char *name)
__u32 info_len = sizeof(info);
int err;
- err = bpf_set_link_xdp_fd(idx, fd, xdp_flags);
+ err = bpf_xdp_attach(idx, fd, xdp_flags, NULL);
if (err < 0) {
printf("ERROR: failed to attach program to %s\n", name);
return err;
@@ -52,13 +50,13 @@ static int do_detach(int idx, const char *name)
__u32 curr_prog_id = 0;
int err = 0;
- err = bpf_get_link_xdp_id(idx, &curr_prog_id, xdp_flags);
+ err = bpf_xdp_query_id(idx, xdp_flags, &curr_prog_id);
if (err) {
- printf("bpf_get_link_xdp_id failed\n");
+ printf("bpf_xdp_query_id failed\n");
return err;
}
if (prog_id == curr_prog_id) {
- err = bpf_set_link_xdp_fd(idx, -1, xdp_flags);
+ err = bpf_xdp_detach(idx, xdp_flags, NULL);
if (err < 0)
printf("ERROR: failed to detach prog from %s\n", name);
} else if (!curr_prog_id) {
@@ -104,19 +102,16 @@ static void usage(const char *prog)
fprintf(stderr,
"%s: %s [OPTS] <ifname|ifindex>\n\n"
"OPTS:\n"
- " -F force loading prog\n",
+ " -F force loading prog\n"
+ " -S use skb-mode\n",
__func__, prog);
}
int main(int argc, char **argv)
{
- struct rlimit r = {RLIM_INFINITY, RLIM_INFINITY};
- struct bpf_prog_load_attr prog_load_attr = {
- .prog_type = BPF_PROG_TYPE_XDP,
- };
- struct perf_buffer_opts pb_opts = {};
const char *optstr = "FS";
int prog_fd, map_fd, opt;
+ struct bpf_program *prog;
struct bpf_object *obj;
struct bpf_map *map;
char filename[256];
@@ -144,23 +139,22 @@ int main(int argc, char **argv)
return 1;
}
- if (setrlimit(RLIMIT_MEMLOCK, &r)) {
- perror("setrlimit(RLIMIT_MEMLOCK)");
- return 1;
- }
-
snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]);
- prog_load_attr.file = filename;
- if (bpf_prog_load_xattr(&prog_load_attr, &obj, &prog_fd))
+ obj = bpf_object__open_file(filename, NULL);
+ if (libbpf_get_error(obj))
return 1;
- if (!prog_fd) {
- printf("bpf_prog_load_xattr: %s\n", strerror(errno));
+ prog = bpf_object__next_program(obj, NULL);
+ bpf_program__set_type(prog, BPF_PROG_TYPE_XDP);
+
+ err = bpf_object__load(obj);
+ if (err)
return 1;
- }
- map = bpf_map__next(NULL, obj);
+ prog_fd = bpf_program__fd(prog);
+
+ map = bpf_object__next_map(obj, NULL);
if (!map) {
printf("finding a map in obj file failed\n");
return 1;
@@ -187,8 +181,7 @@ int main(int argc, char **argv)
return 1;
}
- pb_opts.sample_cb = print_bpf_output;
- pb = perf_buffer__new(map_fd, 8, &pb_opts);
+ pb = perf_buffer__new(map_fd, 8, print_bpf_output, NULL, NULL, NULL);
err = libbpf_get_error(pb);
if (err) {
perror("perf_buffer setup failed");
diff --git a/samples/bpf/xdp_sample_shared.h b/samples/bpf/xdp_sample_shared.h
new file mode 100644
index 000000000000..8a7669a5d563
--- /dev/null
+++ b/samples/bpf/xdp_sample_shared.h
@@ -0,0 +1,17 @@
+// SPDX-License-Identifier: GPL-2.0-only
+#ifndef _XDP_SAMPLE_SHARED_H
+#define _XDP_SAMPLE_SHARED_H
+
+struct datarec {
+ size_t processed;
+ size_t dropped;
+ size_t issue;
+ union {
+ size_t xdp_pass;
+ size_t info;
+ };
+ size_t xdp_drop;
+ size_t xdp_redirect;
+} __attribute__((aligned(64)));
+
+#endif
diff --git a/samples/bpf/xdp_sample_user.c b/samples/bpf/xdp_sample_user.c
new file mode 100644
index 000000000000..158682852162
--- /dev/null
+++ b/samples/bpf/xdp_sample_user.c
@@ -0,0 +1,1673 @@
+// SPDX-License-Identifier: GPL-2.0-only
+#define _GNU_SOURCE
+
+#include <arpa/inet.h>
+#include <bpf/bpf.h>
+#include <bpf/libbpf.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <getopt.h>
+#include <linux/ethtool.h>
+#include <linux/hashtable.h>
+#include <linux/if_link.h>
+#include <linux/jhash.h>
+#include <linux/limits.h>
+#include <linux/list.h>
+#include <linux/sockios.h>
+#include <locale.h>
+#include <math.h>
+#include <net/if.h>
+#include <poll.h>
+#include <signal.h>
+#include <stdbool.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/ioctl.h>
+#include <sys/mman.h>
+#include <sys/signalfd.h>
+#include <sys/sysinfo.h>
+#include <sys/timerfd.h>
+#include <sys/utsname.h>
+#include <time.h>
+#include <unistd.h>
+
+#include "bpf_util.h"
+#include "xdp_sample_user.h"
+
+#define __sample_print(fmt, cond, ...) \
+ ({ \
+ if (cond) \
+ printf(fmt, ##__VA_ARGS__); \
+ })
+
+#define print_always(fmt, ...) __sample_print(fmt, 1, ##__VA_ARGS__)
+#define print_default(fmt, ...) \
+ __sample_print(fmt, sample_log_level & LL_DEFAULT, ##__VA_ARGS__)
+#define __print_err(err, fmt, ...) \
+ ({ \
+ __sample_print(fmt, err > 0 || sample_log_level & LL_DEFAULT, \
+ ##__VA_ARGS__); \
+ sample_err_exp = sample_err_exp ? true : err > 0; \
+ })
+#define print_err(err, fmt, ...) __print_err(err, fmt, ##__VA_ARGS__)
+
+#define __COLUMN(x) "%'10" x " %-13s"
+#define FMT_COLUMNf __COLUMN(".0f")
+#define FMT_COLUMNd __COLUMN("d")
+#define FMT_COLUMNl __COLUMN("llu")
+#define RX(rx) rx, "rx/s"
+#define PPS(pps) pps, "pkt/s"
+#define DROP(drop) drop, "drop/s"
+#define ERR(err) err, "error/s"
+#define HITS(hits) hits, "hit/s"
+#define XMIT(xmit) xmit, "xmit/s"
+#define PASS(pass) pass, "pass/s"
+#define REDIR(redir) redir, "redir/s"
+#define NANOSEC_PER_SEC 1000000000 /* 10^9 */
+
+#define XDP_UNKNOWN (XDP_REDIRECT + 1)
+#define XDP_ACTION_MAX (XDP_UNKNOWN + 1)
+#define XDP_REDIRECT_ERR_MAX 7
+
+enum map_type {
+ MAP_RX,
+ MAP_REDIRECT_ERR,
+ MAP_CPUMAP_ENQUEUE,
+ MAP_CPUMAP_KTHREAD,
+ MAP_EXCEPTION,
+ MAP_DEVMAP_XMIT,
+ MAP_DEVMAP_XMIT_MULTI,
+ NUM_MAP,
+};
+
+enum log_level {
+ LL_DEFAULT = 1U << 0,
+ LL_SIMPLE = 1U << 1,
+ LL_DEBUG = 1U << 2,
+};
+
+struct record {
+ __u64 timestamp;
+ struct datarec total;
+ struct datarec *cpu;
+};
+
+struct map_entry {
+ struct hlist_node node;
+ __u64 pair;
+ struct record val;
+};
+
+struct stats_record {
+ struct record rx_cnt;
+ struct record redir_err[XDP_REDIRECT_ERR_MAX];
+ struct record kthread;
+ struct record exception[XDP_ACTION_MAX];
+ struct record devmap_xmit;
+ DECLARE_HASHTABLE(xmit_map, 5);
+ struct record enq[];
+};
+
+struct sample_output {
+ struct {
+ __u64 rx;
+ __u64 redir;
+ __u64 drop;
+ __u64 drop_xmit;
+ __u64 err;
+ __u64 xmit;
+ } totals;
+ struct {
+ union {
+ __u64 pps;
+ __u64 num;
+ };
+ __u64 drop;
+ __u64 err;
+ } rx_cnt;
+ struct {
+ __u64 suc;
+ __u64 err;
+ } redir_cnt;
+ struct {
+ __u64 hits;
+ } except_cnt;
+ struct {
+ __u64 pps;
+ __u64 drop;
+ __u64 err;
+ double bavg;
+ } xmit_cnt;
+};
+
+struct xdp_desc {
+ int ifindex;
+ __u32 prog_id;
+ int flags;
+} sample_xdp_progs[32];
+
+struct datarec *sample_mmap[NUM_MAP];
+struct bpf_map *sample_map[NUM_MAP];
+size_t sample_map_count[NUM_MAP];
+enum log_level sample_log_level;
+struct sample_output sample_out;
+unsigned long sample_interval;
+bool sample_err_exp;
+int sample_xdp_cnt;
+int sample_n_cpus;
+int sample_sig_fd;
+int sample_mask;
+
+static const char *xdp_redirect_err_names[XDP_REDIRECT_ERR_MAX] = {
+ /* Key=1 keeps unknown errors */
+ "Success",
+ "Unknown",
+ "EINVAL",
+ "ENETDOWN",
+ "EMSGSIZE",
+ "EOPNOTSUPP",
+ "ENOSPC",
+};
+
+/* Keyed from Unknown */
+static const char *xdp_redirect_err_help[XDP_REDIRECT_ERR_MAX - 1] = {
+ "Unknown error",
+ "Invalid redirection",
+ "Device being redirected to is down",
+ "Packet length too large for device",
+ "Operation not supported",
+ "No space in ptr_ring of cpumap kthread",
+};
+
+static const char *xdp_action_names[XDP_ACTION_MAX] = {
+ [XDP_ABORTED] = "XDP_ABORTED",
+ [XDP_DROP] = "XDP_DROP",
+ [XDP_PASS] = "XDP_PASS",
+ [XDP_TX] = "XDP_TX",
+ [XDP_REDIRECT] = "XDP_REDIRECT",
+ [XDP_UNKNOWN] = "XDP_UNKNOWN",
+};
+
+static __u64 gettime(void)
+{
+ struct timespec t;
+ int res;
+
+ res = clock_gettime(CLOCK_MONOTONIC, &t);
+ if (res < 0) {
+ fprintf(stderr, "Error with gettimeofday! (%i)\n", res);
+ return UINT64_MAX;
+ }
+ return (__u64)t.tv_sec * NANOSEC_PER_SEC + t.tv_nsec;
+}
+
+static const char *action2str(int action)
+{
+ if (action < XDP_ACTION_MAX)
+ return xdp_action_names[action];
+ return NULL;
+}
+
+static void sample_print_help(int mask)
+{
+ printf("Output format description\n\n"
+ "By default, redirect success statistics are disabled, use -s to enable.\n"
+ "The terse output mode is default, verbose mode can be activated using -v\n"
+ "Use SIGQUIT (Ctrl + \\) to switch the mode dynamically at runtime\n\n"
+ "Terse mode displays at most the following fields:\n"
+ " rx/s Number of packets received per second\n"
+ " redir/s Number of packets successfully redirected per second\n"
+ " err,drop/s Aggregated count of errors per second (including dropped packets)\n"
+ " xmit/s Number of packets transmitted on the output device per second\n\n"
+ "Output description for verbose mode:\n"
+ " FIELD DESCRIPTION\n");
+
+ if (mask & SAMPLE_RX_CNT) {
+ printf(" receive\t\tDisplays the number of packets received & errors encountered\n"
+ " \t\t\tWhenever an error or packet drop occurs, details of per CPU error\n"
+ " \t\t\tand drop statistics will be expanded inline in terse mode.\n"
+ " \t\t\t\tpkt/s - Packets received per second\n"
+ " \t\t\t\tdrop/s - Packets dropped per second\n"
+ " \t\t\t\terror/s - Errors encountered per second\n\n");
+ }
+ if (mask & (SAMPLE_REDIRECT_CNT | SAMPLE_REDIRECT_ERR_CNT)) {
+ printf(" redirect\t\tDisplays the number of packets successfully redirected\n"
+ " \t\t\tErrors encountered are expanded under redirect_err field\n"
+ " \t\t\tNote that passing -s to enable it has a per packet overhead\n"
+ " \t\t\t\tredir/s - Packets redirected successfully per second\n\n"
+ " redirect_err\t\tDisplays the number of packets that failed redirection\n"
+ " \t\t\tThe errno is expanded under this field with per CPU count\n"
+ " \t\t\tThe recognized errors are:\n");
+
+ for (int i = 2; i < XDP_REDIRECT_ERR_MAX; i++)
+ printf("\t\t\t %s: %s\n", xdp_redirect_err_names[i],
+ xdp_redirect_err_help[i - 1]);
+
+ printf(" \n\t\t\t\terror/s - Packets that failed redirection per second\n\n");
+ }
+
+ if (mask & SAMPLE_CPUMAP_ENQUEUE_CNT) {
+ printf(" enqueue to cpu N\tDisplays the number of packets enqueued to bulk queue of CPU N\n"
+ " \t\t\tExpands to cpu:FROM->N to display enqueue stats for each CPU enqueuing to CPU N\n"
+ " \t\t\tReceived packets can be associated with the CPU redirect program is enqueuing \n"
+ " \t\t\tpackets to.\n"
+ " \t\t\t\tpkt/s - Packets enqueued per second from other CPU to CPU N\n"
+ " \t\t\t\tdrop/s - Packets dropped when trying to enqueue to CPU N\n"
+ " \t\t\t\tbulk-avg - Average number of packets processed for each event\n\n");
+ }
+
+ if (mask & SAMPLE_CPUMAP_KTHREAD_CNT) {
+ printf(" kthread\t\tDisplays the number of packets processed in CPUMAP kthread for each CPU\n"
+ " \t\t\tPackets consumed from ptr_ring in kthread, and its xdp_stats (after calling \n"
+ " \t\t\tCPUMAP bpf prog) are expanded below this. xdp_stats are expanded as a total and\n"
+ " \t\t\tthen per-CPU to associate it to each CPU's pinned CPUMAP kthread.\n"
+ " \t\t\t\tpkt/s - Packets consumed per second from ptr_ring\n"
+ " \t\t\t\tdrop/s - Packets dropped per second in kthread\n"
+ " \t\t\t\tsched - Number of times kthread called schedule()\n\n"
+ " \t\t\txdp_stats (also expands to per-CPU counts)\n"
+ " \t\t\t\tpass/s - XDP_PASS count for CPUMAP program execution\n"
+ " \t\t\t\tdrop/s - XDP_DROP count for CPUMAP program execution\n"
+ " \t\t\t\tredir/s - XDP_REDIRECT count for CPUMAP program execution\n\n");
+ }
+
+ if (mask & SAMPLE_EXCEPTION_CNT) {
+ printf(" xdp_exception\t\tDisplays xdp_exception tracepoint events\n"
+ " \t\t\tThis can occur due to internal driver errors, unrecognized\n"
+ " \t\t\tXDP actions and due to explicit user trigger by use of XDP_ABORTED\n"
+ " \t\t\tEach action is expanded below this field with its count\n"
+ " \t\t\t\thit/s - Number of times the tracepoint was hit per second\n\n");
+ }
+
+ if (mask & SAMPLE_DEVMAP_XMIT_CNT) {
+ printf(" devmap_xmit\t\tDisplays devmap_xmit tracepoint events\n"
+ " \t\t\tThis tracepoint is invoked for successful transmissions on output\n"
+ " \t\t\tdevice but these statistics are not available for generic XDP mode,\n"
+ " \t\t\thence they will be omitted from the output when using SKB mode\n"
+ " \t\t\t\txmit/s - Number of packets that were transmitted per second\n"
+ " \t\t\t\tdrop/s - Number of packets that failed transmissions per second\n"
+ " \t\t\t\tdrv_err/s - Number of internal driver errors per second\n"
+ " \t\t\t\tbulk-avg - Average number of packets processed for each event\n\n");
+ }
+}
+
+void sample_usage(char *argv[], const struct option *long_options,
+ const char *doc, int mask, bool error)
+{
+ int i;
+
+ if (!error)
+ sample_print_help(mask);
+
+ printf("\n%s\nOption for %s:\n", doc, argv[0]);
+ for (i = 0; long_options[i].name != 0; i++) {
+ printf(" --%-15s", long_options[i].name);
+ if (long_options[i].flag != NULL)
+ printf(" flag (internal value: %d)",
+ *long_options[i].flag);
+ else
+ printf("\t short-option: -%c", long_options[i].val);
+ printf("\n");
+ }
+ printf("\n");
+}
+
+static struct datarec *alloc_record_per_cpu(void)
+{
+ unsigned int nr_cpus = libbpf_num_possible_cpus();
+ struct datarec *array;
+
+ array = calloc(nr_cpus, sizeof(*array));
+ if (!array) {
+ fprintf(stderr, "Failed to allocate memory (nr_cpus: %u)\n",
+ nr_cpus);
+ return NULL;
+ }
+ return array;
+}
+
+static int map_entry_init(struct map_entry *e, __u64 pair)
+{
+ e->pair = pair;
+ INIT_HLIST_NODE(&e->node);
+ e->val.timestamp = gettime();
+ e->val.cpu = alloc_record_per_cpu();
+ if (!e->val.cpu)
+ return -ENOMEM;
+ return 0;
+}
+
+static void map_collect_percpu(struct datarec *values, struct record *rec)
+{
+ /* For percpu maps, userspace gets a value per possible CPU */
+ unsigned int nr_cpus = libbpf_num_possible_cpus();
+ __u64 sum_xdp_redirect = 0;
+ __u64 sum_processed = 0;
+ __u64 sum_xdp_pass = 0;
+ __u64 sum_xdp_drop = 0;
+ __u64 sum_dropped = 0;
+ __u64 sum_issue = 0;
+ int i;
+
+ /* Get time as close as possible to reading map contents */
+ rec->timestamp = gettime();
+
+ /* Record and sum values from each CPU */
+ for (i = 0; i < nr_cpus; i++) {
+ rec->cpu[i].processed = READ_ONCE(values[i].processed);
+ rec->cpu[i].dropped = READ_ONCE(values[i].dropped);
+ rec->cpu[i].issue = READ_ONCE(values[i].issue);
+ rec->cpu[i].xdp_pass = READ_ONCE(values[i].xdp_pass);
+ rec->cpu[i].xdp_drop = READ_ONCE(values[i].xdp_drop);
+ rec->cpu[i].xdp_redirect = READ_ONCE(values[i].xdp_redirect);
+
+ sum_processed += rec->cpu[i].processed;
+ sum_dropped += rec->cpu[i].dropped;
+ sum_issue += rec->cpu[i].issue;
+ sum_xdp_pass += rec->cpu[i].xdp_pass;
+ sum_xdp_drop += rec->cpu[i].xdp_drop;
+ sum_xdp_redirect += rec->cpu[i].xdp_redirect;
+ }
+
+ rec->total.processed = sum_processed;
+ rec->total.dropped = sum_dropped;
+ rec->total.issue = sum_issue;
+ rec->total.xdp_pass = sum_xdp_pass;
+ rec->total.xdp_drop = sum_xdp_drop;
+ rec->total.xdp_redirect = sum_xdp_redirect;
+}
+
+static int map_collect_percpu_devmap(int map_fd, struct stats_record *rec)
+{
+ unsigned int nr_cpus = bpf_num_possible_cpus();
+ __u32 batch, count = 32;
+ struct datarec *values;
+ bool init = false;
+ __u64 *keys;
+ int i, ret;
+
+ keys = calloc(count, sizeof(__u64));
+ if (!keys)
+ return -ENOMEM;
+ values = calloc(count * nr_cpus, sizeof(struct datarec));
+ if (!values) {
+ free(keys);
+ return -ENOMEM;
+ }
+
+ for (;;) {
+ bool exit = false;
+
+ ret = bpf_map_lookup_batch(map_fd, init ? &batch : NULL, &batch,
+ keys, values, &count, NULL);
+ if (ret < 0 && errno != ENOENT)
+ break;
+ if (errno == ENOENT)
+ exit = true;
+
+ init = true;
+ for (i = 0; i < count; i++) {
+ struct map_entry *e, *x = NULL;
+ __u64 pair = keys[i];
+ struct datarec *arr;
+
+ arr = &values[i * nr_cpus];
+ hash_for_each_possible(rec->xmit_map, e, node, pair) {
+ if (e->pair == pair) {
+ x = e;
+ break;
+ }
+ }
+ if (!x) {
+ x = calloc(1, sizeof(*x));
+ if (!x)
+ goto cleanup;
+ if (map_entry_init(x, pair) < 0) {
+ free(x);
+ goto cleanup;
+ }
+ hash_add(rec->xmit_map, &x->node, pair);
+ }
+ map_collect_percpu(arr, &x->val);
+ }
+
+ if (exit)
+ break;
+ count = 32;
+ }
+
+ free(values);
+ free(keys);
+ return 0;
+cleanup:
+ free(values);
+ free(keys);
+ return -ENOMEM;
+}
+
+static struct stats_record *alloc_stats_record(void)
+{
+ struct stats_record *rec;
+ int i;
+
+ rec = calloc(1, sizeof(*rec) + sample_n_cpus * sizeof(struct record));
+ if (!rec) {
+ fprintf(stderr, "Failed to allocate memory\n");
+ return NULL;
+ }
+
+ if (sample_mask & SAMPLE_RX_CNT) {
+ rec->rx_cnt.cpu = alloc_record_per_cpu();
+ if (!rec->rx_cnt.cpu) {
+ fprintf(stderr,
+ "Failed to allocate rx_cnt per-CPU array\n");
+ goto end_rec;
+ }
+ }
+ if (sample_mask & (SAMPLE_REDIRECT_CNT | SAMPLE_REDIRECT_ERR_CNT)) {
+ for (i = 0; i < XDP_REDIRECT_ERR_MAX; i++) {
+ rec->redir_err[i].cpu = alloc_record_per_cpu();
+ if (!rec->redir_err[i].cpu) {
+ fprintf(stderr,
+ "Failed to allocate redir_err per-CPU array for "
+ "\"%s\" case\n",
+ xdp_redirect_err_names[i]);
+ while (i--)
+ free(rec->redir_err[i].cpu);
+ goto end_rx_cnt;
+ }
+ }
+ }
+ if (sample_mask & SAMPLE_CPUMAP_KTHREAD_CNT) {
+ rec->kthread.cpu = alloc_record_per_cpu();
+ if (!rec->kthread.cpu) {
+ fprintf(stderr,
+ "Failed to allocate kthread per-CPU array\n");
+ goto end_redir;
+ }
+ }
+ if (sample_mask & SAMPLE_EXCEPTION_CNT) {
+ for (i = 0; i < XDP_ACTION_MAX; i++) {
+ rec->exception[i].cpu = alloc_record_per_cpu();
+ if (!rec->exception[i].cpu) {
+ fprintf(stderr,
+ "Failed to allocate exception per-CPU array for "
+ "\"%s\" case\n",
+ action2str(i));
+ while (i--)
+ free(rec->exception[i].cpu);
+ goto end_kthread;
+ }
+ }
+ }
+ if (sample_mask & SAMPLE_DEVMAP_XMIT_CNT) {
+ rec->devmap_xmit.cpu = alloc_record_per_cpu();
+ if (!rec->devmap_xmit.cpu) {
+ fprintf(stderr,
+ "Failed to allocate devmap_xmit per-CPU array\n");
+ goto end_exception;
+ }
+ }
+ if (sample_mask & SAMPLE_DEVMAP_XMIT_CNT_MULTI)
+ hash_init(rec->xmit_map);
+ if (sample_mask & SAMPLE_CPUMAP_ENQUEUE_CNT) {
+ for (i = 0; i < sample_n_cpus; i++) {
+ rec->enq[i].cpu = alloc_record_per_cpu();
+ if (!rec->enq[i].cpu) {
+ fprintf(stderr,
+ "Failed to allocate enqueue per-CPU array for "
+ "CPU %d\n",
+ i);
+ while (i--)
+ free(rec->enq[i].cpu);
+ goto end_devmap_xmit;
+ }
+ }
+ }
+
+ return rec;
+
+end_devmap_xmit:
+ free(rec->devmap_xmit.cpu);
+end_exception:
+ for (i = 0; i < XDP_ACTION_MAX; i++)
+ free(rec->exception[i].cpu);
+end_kthread:
+ free(rec->kthread.cpu);
+end_redir:
+ for (i = 0; i < XDP_REDIRECT_ERR_MAX; i++)
+ free(rec->redir_err[i].cpu);
+end_rx_cnt:
+ free(rec->rx_cnt.cpu);
+end_rec:
+ free(rec);
+ return NULL;
+}
+
+static void free_stats_record(struct stats_record *r)
+{
+ struct hlist_node *tmp;
+ struct map_entry *e;
+ int i;
+
+ for (i = 0; i < sample_n_cpus; i++)
+ free(r->enq[i].cpu);
+ hash_for_each_safe(r->xmit_map, i, tmp, e, node) {
+ hash_del(&e->node);
+ free(e->val.cpu);
+ free(e);
+ }
+ free(r->devmap_xmit.cpu);
+ for (i = 0; i < XDP_ACTION_MAX; i++)
+ free(r->exception[i].cpu);
+ free(r->kthread.cpu);
+ for (i = 0; i < XDP_REDIRECT_ERR_MAX; i++)
+ free(r->redir_err[i].cpu);
+ free(r->rx_cnt.cpu);
+ free(r);
+}
+
+static double calc_period(struct record *r, struct record *p)
+{
+ double period_ = 0;
+ __u64 period = 0;
+
+ period = r->timestamp - p->timestamp;
+ if (period > 0)
+ period_ = ((double)period / NANOSEC_PER_SEC);
+
+ return period_;
+}
+
+static double sample_round(double val)
+{
+ if (val - floor(val) < 0.5)
+ return floor(val);
+ return ceil(val);
+}
+
+static __u64 calc_pps(struct datarec *r, struct datarec *p, double period_)
+{
+ __u64 packets = 0;
+ __u64 pps = 0;
+
+ if (period_ > 0) {
+ packets = r->processed - p->processed;
+ pps = sample_round(packets / period_);
+ }
+ return pps;
+}
+
+static __u64 calc_drop_pps(struct datarec *r, struct datarec *p, double period_)
+{
+ __u64 packets = 0;
+ __u64 pps = 0;
+
+ if (period_ > 0) {
+ packets = r->dropped - p->dropped;
+ pps = sample_round(packets / period_);
+ }
+ return pps;
+}
+
+static __u64 calc_errs_pps(struct datarec *r, struct datarec *p, double period_)
+{
+ __u64 packets = 0;
+ __u64 pps = 0;
+
+ if (period_ > 0) {
+ packets = r->issue - p->issue;
+ pps = sample_round(packets / period_);
+ }
+ return pps;
+}
+
+static __u64 calc_info_pps(struct datarec *r, struct datarec *p, double period_)
+{
+ __u64 packets = 0;
+ __u64 pps = 0;
+
+ if (period_ > 0) {
+ packets = r->info - p->info;
+ pps = sample_round(packets / period_);
+ }
+ return pps;
+}
+
+static void calc_xdp_pps(struct datarec *r, struct datarec *p, double *xdp_pass,
+ double *xdp_drop, double *xdp_redirect, double period_)
+{
+ *xdp_pass = 0, *xdp_drop = 0, *xdp_redirect = 0;
+ if (period_ > 0) {
+ *xdp_redirect = (r->xdp_redirect - p->xdp_redirect) / period_;
+ *xdp_pass = (r->xdp_pass - p->xdp_pass) / period_;
+ *xdp_drop = (r->xdp_drop - p->xdp_drop) / period_;
+ }
+}
+
+static void stats_get_rx_cnt(struct stats_record *stats_rec,
+ struct stats_record *stats_prev,
+ unsigned int nr_cpus, struct sample_output *out)
+{
+ struct record *rec, *prev;
+ double t, pps, drop, err;
+ int i;
+
+ rec = &stats_rec->rx_cnt;
+ prev = &stats_prev->rx_cnt;
+ t = calc_period(rec, prev);
+
+ for (i = 0; i < nr_cpus; i++) {
+ struct datarec *r = &rec->cpu[i];
+ struct datarec *p = &prev->cpu[i];
+ char str[64];
+
+ pps = calc_pps(r, p, t);
+ drop = calc_drop_pps(r, p, t);
+ err = calc_errs_pps(r, p, t);
+ if (!pps && !drop && !err)
+ continue;
+
+ snprintf(str, sizeof(str), "cpu:%d", i);
+ print_default(" %-18s " FMT_COLUMNf FMT_COLUMNf FMT_COLUMNf
+ "\n",
+ str, PPS(pps), DROP(drop), ERR(err));
+ }
+
+ if (out) {
+ pps = calc_pps(&rec->total, &prev->total, t);
+ drop = calc_drop_pps(&rec->total, &prev->total, t);
+ err = calc_errs_pps(&rec->total, &prev->total, t);
+
+ out->rx_cnt.pps = pps;
+ out->rx_cnt.drop = drop;
+ out->rx_cnt.err = err;
+ out->totals.rx += pps;
+ out->totals.drop += drop;
+ out->totals.err += err;
+ }
+}
+
+static void stats_get_cpumap_enqueue(struct stats_record *stats_rec,
+ struct stats_record *stats_prev,
+ unsigned int nr_cpus)
+{
+ struct record *rec, *prev;
+ double t, pps, drop, err;
+ int i, to_cpu;
+
+ /* cpumap enqueue stats */
+ for (to_cpu = 0; to_cpu < sample_n_cpus; to_cpu++) {
+ rec = &stats_rec->enq[to_cpu];
+ prev = &stats_prev->enq[to_cpu];
+ t = calc_period(rec, prev);
+
+ pps = calc_pps(&rec->total, &prev->total, t);
+ drop = calc_drop_pps(&rec->total, &prev->total, t);
+ err = calc_errs_pps(&rec->total, &prev->total, t);
+
+ if (pps > 0 || drop > 0) {
+ char str[64];
+
+ snprintf(str, sizeof(str), "enqueue to cpu %d", to_cpu);
+
+ if (err > 0)
+ err = pps / err; /* calc average bulk size */
+
+ print_err(drop,
+ " %-20s " FMT_COLUMNf FMT_COLUMNf __COLUMN(
+ ".2f") "\n",
+ str, PPS(pps), DROP(drop), err, "bulk-avg");
+ }
+
+ for (i = 0; i < nr_cpus; i++) {
+ struct datarec *r = &rec->cpu[i];
+ struct datarec *p = &prev->cpu[i];
+ char str[64];
+
+ pps = calc_pps(r, p, t);
+ drop = calc_drop_pps(r, p, t);
+ err = calc_errs_pps(r, p, t);
+ if (!pps && !drop && !err)
+ continue;
+
+ snprintf(str, sizeof(str), "cpu:%d->%d", i, to_cpu);
+ if (err > 0)
+ err = pps / err; /* calc average bulk size */
+ print_default(
+ " %-18s " FMT_COLUMNf FMT_COLUMNf __COLUMN(
+ ".2f") "\n",
+ str, PPS(pps), DROP(drop), err, "bulk-avg");
+ }
+ }
+}
+
+static void stats_get_cpumap_remote(struct stats_record *stats_rec,
+ struct stats_record *stats_prev,
+ unsigned int nr_cpus)
+{
+ double xdp_pass, xdp_drop, xdp_redirect;
+ struct record *rec, *prev;
+ double t;
+ int i;
+
+ rec = &stats_rec->kthread;
+ prev = &stats_prev->kthread;
+ t = calc_period(rec, prev);
+
+ calc_xdp_pps(&rec->total, &prev->total, &xdp_pass, &xdp_drop,
+ &xdp_redirect, t);
+ if (xdp_pass || xdp_drop || xdp_redirect) {
+ print_err(xdp_drop,
+ " %-18s " FMT_COLUMNf FMT_COLUMNf FMT_COLUMNf "\n",
+ "xdp_stats", PASS(xdp_pass), DROP(xdp_drop),
+ REDIR(xdp_redirect));
+ }
+
+ for (i = 0; i < nr_cpus; i++) {
+ struct datarec *r = &rec->cpu[i];
+ struct datarec *p = &prev->cpu[i];
+ char str[64];
+
+ calc_xdp_pps(r, p, &xdp_pass, &xdp_drop, &xdp_redirect, t);
+ if (!xdp_pass && !xdp_drop && !xdp_redirect)
+ continue;
+
+ snprintf(str, sizeof(str), "cpu:%d", i);
+ print_default(" %-16s " FMT_COLUMNf FMT_COLUMNf FMT_COLUMNf
+ "\n",
+ str, PASS(xdp_pass), DROP(xdp_drop),
+ REDIR(xdp_redirect));
+ }
+}
+
+static void stats_get_cpumap_kthread(struct stats_record *stats_rec,
+ struct stats_record *stats_prev,
+ unsigned int nr_cpus)
+{
+ struct record *rec, *prev;
+ double t, pps, drop, err;
+ int i;
+
+ rec = &stats_rec->kthread;
+ prev = &stats_prev->kthread;
+ t = calc_period(rec, prev);
+
+ pps = calc_pps(&rec->total, &prev->total, t);
+ drop = calc_drop_pps(&rec->total, &prev->total, t);
+ err = calc_errs_pps(&rec->total, &prev->total, t);
+
+ print_err(drop, " %-20s " FMT_COLUMNf FMT_COLUMNf FMT_COLUMNf "\n",
+ pps ? "kthread total" : "kthread", PPS(pps), DROP(drop), err,
+ "sched");
+
+ for (i = 0; i < nr_cpus; i++) {
+ struct datarec *r = &rec->cpu[i];
+ struct datarec *p = &prev->cpu[i];
+ char str[64];
+
+ pps = calc_pps(r, p, t);
+ drop = calc_drop_pps(r, p, t);
+ err = calc_errs_pps(r, p, t);
+ if (!pps && !drop && !err)
+ continue;
+
+ snprintf(str, sizeof(str), "cpu:%d", i);
+ print_default(" %-18s " FMT_COLUMNf FMT_COLUMNf FMT_COLUMNf
+ "\n",
+ str, PPS(pps), DROP(drop), err, "sched");
+ }
+}
+
+static void stats_get_redirect_cnt(struct stats_record *stats_rec,
+ struct stats_record *stats_prev,
+ unsigned int nr_cpus,
+ struct sample_output *out)
+{
+ struct record *rec, *prev;
+ double t, pps;
+ int i;
+
+ rec = &stats_rec->redir_err[0];
+ prev = &stats_prev->redir_err[0];
+ t = calc_period(rec, prev);
+ for (i = 0; i < nr_cpus; i++) {
+ struct datarec *r = &rec->cpu[i];
+ struct datarec *p = &prev->cpu[i];
+ char str[64];
+
+ pps = calc_pps(r, p, t);
+ if (!pps)
+ continue;
+
+ snprintf(str, sizeof(str), "cpu:%d", i);
+ print_default(" %-18s " FMT_COLUMNf "\n", str, REDIR(pps));
+ }
+
+ if (out) {
+ pps = calc_pps(&rec->total, &prev->total, t);
+ out->redir_cnt.suc = pps;
+ out->totals.redir += pps;
+ }
+}
+
+static void stats_get_redirect_err_cnt(struct stats_record *stats_rec,
+ struct stats_record *stats_prev,
+ unsigned int nr_cpus,
+ struct sample_output *out)
+{
+ struct record *rec, *prev;
+ double t, drop, sum = 0;
+ int rec_i, i;
+
+ for (rec_i = 1; rec_i < XDP_REDIRECT_ERR_MAX; rec_i++) {
+ char str[64];
+
+ rec = &stats_rec->redir_err[rec_i];
+ prev = &stats_prev->redir_err[rec_i];
+ t = calc_period(rec, prev);
+
+ drop = calc_drop_pps(&rec->total, &prev->total, t);
+ if (drop > 0 && !out) {
+ snprintf(str, sizeof(str),
+ sample_log_level & LL_DEFAULT ? "%s total" :
+ "%s",
+ xdp_redirect_err_names[rec_i]);
+ print_err(drop, " %-18s " FMT_COLUMNf "\n", str,
+ ERR(drop));
+ }
+
+ for (i = 0; i < nr_cpus; i++) {
+ struct datarec *r = &rec->cpu[i];
+ struct datarec *p = &prev->cpu[i];
+ double drop;
+
+ drop = calc_drop_pps(r, p, t);
+ if (!drop)
+ continue;
+
+ snprintf(str, sizeof(str), "cpu:%d", i);
+ print_default(" %-16s" FMT_COLUMNf "\n", str,
+ ERR(drop));
+ }
+
+ sum += drop;
+ }
+
+ if (out) {
+ out->redir_cnt.err = sum;
+ out->totals.err += sum;
+ }
+}
+
+static void stats_get_exception_cnt(struct stats_record *stats_rec,
+ struct stats_record *stats_prev,
+ unsigned int nr_cpus,
+ struct sample_output *out)
+{
+ double t, drop, sum = 0;
+ struct record *rec, *prev;
+ int rec_i, i;
+
+ for (rec_i = 0; rec_i < XDP_ACTION_MAX; rec_i++) {
+ rec = &stats_rec->exception[rec_i];
+ prev = &stats_prev->exception[rec_i];
+ t = calc_period(rec, prev);
+
+ drop = calc_drop_pps(&rec->total, &prev->total, t);
+ /* Fold out errors after heading */
+ sum += drop;
+
+ if (drop > 0 && !out) {
+ print_always(" %-18s " FMT_COLUMNf "\n",
+ action2str(rec_i), ERR(drop));
+
+ for (i = 0; i < nr_cpus; i++) {
+ struct datarec *r = &rec->cpu[i];
+ struct datarec *p = &prev->cpu[i];
+ char str[64];
+ double drop;
+
+ drop = calc_drop_pps(r, p, t);
+ if (!drop)
+ continue;
+
+ snprintf(str, sizeof(str), "cpu:%d", i);
+ print_default(" %-16s" FMT_COLUMNf "\n",
+ str, ERR(drop));
+ }
+ }
+ }
+
+ if (out) {
+ out->except_cnt.hits = sum;
+ out->totals.err += sum;
+ }
+}
+
+static void stats_get_devmap_xmit(struct stats_record *stats_rec,
+ struct stats_record *stats_prev,
+ unsigned int nr_cpus,
+ struct sample_output *out)
+{
+ double pps, drop, info, err;
+ struct record *rec, *prev;
+ double t;
+ int i;
+
+ rec = &stats_rec->devmap_xmit;
+ prev = &stats_prev->devmap_xmit;
+ t = calc_period(rec, prev);
+ for (i = 0; i < nr_cpus; i++) {
+ struct datarec *r = &rec->cpu[i];
+ struct datarec *p = &prev->cpu[i];
+ char str[64];
+
+ pps = calc_pps(r, p, t);
+ drop = calc_drop_pps(r, p, t);
+ err = calc_errs_pps(r, p, t);
+
+ if (!pps && !drop && !err)
+ continue;
+
+ snprintf(str, sizeof(str), "cpu:%d", i);
+ info = calc_info_pps(r, p, t);
+ if (info > 0)
+ info = (pps + drop) / info; /* calc avg bulk */
+ print_default(" %-18s" FMT_COLUMNf FMT_COLUMNf FMT_COLUMNf
+ __COLUMN(".2f") "\n",
+ str, XMIT(pps), DROP(drop), err, "drv_err/s",
+ info, "bulk-avg");
+ }
+ if (out) {
+ pps = calc_pps(&rec->total, &prev->total, t);
+ drop = calc_drop_pps(&rec->total, &prev->total, t);
+ info = calc_info_pps(&rec->total, &prev->total, t);
+ if (info > 0)
+ info = (pps + drop) / info; /* calc avg bulk */
+ err = calc_errs_pps(&rec->total, &prev->total, t);
+
+ out->xmit_cnt.pps = pps;
+ out->xmit_cnt.drop = drop;
+ out->xmit_cnt.bavg = info;
+ out->xmit_cnt.err = err;
+ out->totals.xmit += pps;
+ out->totals.drop_xmit += drop;
+ out->totals.err += err;
+ }
+}
+
+static void stats_get_devmap_xmit_multi(struct stats_record *stats_rec,
+ struct stats_record *stats_prev,
+ unsigned int nr_cpus,
+ struct sample_output *out,
+ bool xmit_total)
+{
+ double pps, drop, info, err;
+ struct map_entry *entry;
+ struct record *r, *p;
+ double t;
+ int bkt;
+
+ hash_for_each(stats_rec->xmit_map, bkt, entry, node) {
+ struct map_entry *e, *x = NULL;
+ char ifname_from[IFNAMSIZ];
+ char ifname_to[IFNAMSIZ];
+ const char *fstr, *tstr;
+ unsigned long prev_time;
+ struct record beg = {};
+ __u32 from_idx, to_idx;
+ char str[128];
+ __u64 pair;
+ int i;
+
+ prev_time = sample_interval * NANOSEC_PER_SEC;
+
+ pair = entry->pair;
+ from_idx = pair >> 32;
+ to_idx = pair & 0xFFFFFFFF;
+
+ r = &entry->val;
+ beg.timestamp = r->timestamp - prev_time;
+
+ /* Find matching entry from stats_prev map */
+ hash_for_each_possible(stats_prev->xmit_map, e, node, pair) {
+ if (e->pair == pair) {
+ x = e;
+ break;
+ }
+ }
+ if (x)
+ p = &x->val;
+ else
+ p = &beg;
+ t = calc_period(r, p);
+ pps = calc_pps(&r->total, &p->total, t);
+ drop = calc_drop_pps(&r->total, &p->total, t);
+ info = calc_info_pps(&r->total, &p->total, t);
+ if (info > 0)
+ info = (pps + drop) / info; /* calc avg bulk */
+ err = calc_errs_pps(&r->total, &p->total, t);
+
+ if (out) {
+ /* We are responsible for filling out totals */
+ out->totals.xmit += pps;
+ out->totals.drop_xmit += drop;
+ out->totals.err += err;
+ continue;
+ }
+
+ fstr = tstr = NULL;
+ if (if_indextoname(from_idx, ifname_from))
+ fstr = ifname_from;
+ if (if_indextoname(to_idx, ifname_to))
+ tstr = ifname_to;
+
+ snprintf(str, sizeof(str), "xmit %s->%s", fstr ?: "?",
+ tstr ?: "?");
+ /* Skip idle streams of redirection */
+ if (pps || drop || err) {
+ print_err(drop,
+ " %-20s " FMT_COLUMNf FMT_COLUMNf FMT_COLUMNf
+ __COLUMN(".2f") "\n", str, XMIT(pps), DROP(drop),
+ err, "drv_err/s", info, "bulk-avg");
+ }
+
+ for (i = 0; i < nr_cpus; i++) {
+ struct datarec *rc = &r->cpu[i];
+ struct datarec *pc, p_beg = {};
+ char str[64];
+
+ pc = p == &beg ? &p_beg : &p->cpu[i];
+
+ pps = calc_pps(rc, pc, t);
+ drop = calc_drop_pps(rc, pc, t);
+ err = calc_errs_pps(rc, pc, t);
+
+ if (!pps && !drop && !err)
+ continue;
+
+ snprintf(str, sizeof(str), "cpu:%d", i);
+ info = calc_info_pps(rc, pc, t);
+ if (info > 0)
+ info = (pps + drop) / info; /* calc avg bulk */
+
+ print_default(" %-18s" FMT_COLUMNf FMT_COLUMNf FMT_COLUMNf
+ __COLUMN(".2f") "\n", str, XMIT(pps),
+ DROP(drop), err, "drv_err/s", info, "bulk-avg");
+ }
+ }
+}
+
+static void stats_print(const char *prefix, int mask, struct stats_record *r,
+ struct stats_record *p, struct sample_output *out)
+{
+ int nr_cpus = libbpf_num_possible_cpus();
+ const char *str;
+
+ print_always("%-23s", prefix ?: "Summary");
+ if (mask & SAMPLE_RX_CNT)
+ print_always(FMT_COLUMNl, RX(out->totals.rx));
+ if (mask & SAMPLE_REDIRECT_CNT)
+ print_always(FMT_COLUMNl, REDIR(out->totals.redir));
+ printf(FMT_COLUMNl,
+ out->totals.err + out->totals.drop + out->totals.drop_xmit,
+ "err,drop/s");
+ if (mask & SAMPLE_DEVMAP_XMIT_CNT ||
+ mask & SAMPLE_DEVMAP_XMIT_CNT_MULTI)
+ printf(FMT_COLUMNl, XMIT(out->totals.xmit));
+ printf("\n");
+
+ if (mask & SAMPLE_RX_CNT) {
+ str = (sample_log_level & LL_DEFAULT) && out->rx_cnt.pps ?
+ "receive total" :
+ "receive";
+ print_err((out->rx_cnt.err || out->rx_cnt.drop),
+ " %-20s " FMT_COLUMNl FMT_COLUMNl FMT_COLUMNl "\n",
+ str, PPS(out->rx_cnt.pps), DROP(out->rx_cnt.drop),
+ ERR(out->rx_cnt.err));
+
+ stats_get_rx_cnt(r, p, nr_cpus, NULL);
+ }
+
+ if (mask & SAMPLE_CPUMAP_ENQUEUE_CNT)
+ stats_get_cpumap_enqueue(r, p, nr_cpus);
+
+ if (mask & SAMPLE_CPUMAP_KTHREAD_CNT) {
+ stats_get_cpumap_kthread(r, p, nr_cpus);
+ stats_get_cpumap_remote(r, p, nr_cpus);
+ }
+
+ if (mask & SAMPLE_REDIRECT_CNT) {
+ str = out->redir_cnt.suc ? "redirect total" : "redirect";
+ print_default(" %-20s " FMT_COLUMNl "\n", str,
+ REDIR(out->redir_cnt.suc));
+
+ stats_get_redirect_cnt(r, p, nr_cpus, NULL);
+ }
+
+ if (mask & SAMPLE_REDIRECT_ERR_CNT) {
+ str = (sample_log_level & LL_DEFAULT) && out->redir_cnt.err ?
+ "redirect_err total" :
+ "redirect_err";
+ print_err(out->redir_cnt.err, " %-20s " FMT_COLUMNl "\n", str,
+ ERR(out->redir_cnt.err));
+
+ stats_get_redirect_err_cnt(r, p, nr_cpus, NULL);
+ }
+
+ if (mask & SAMPLE_EXCEPTION_CNT) {
+ str = out->except_cnt.hits ? "xdp_exception total" :
+ "xdp_exception";
+
+ print_err(out->except_cnt.hits, " %-20s " FMT_COLUMNl "\n", str,
+ HITS(out->except_cnt.hits));
+
+ stats_get_exception_cnt(r, p, nr_cpus, NULL);
+ }
+
+ if (mask & SAMPLE_DEVMAP_XMIT_CNT) {
+ str = (sample_log_level & LL_DEFAULT) && out->xmit_cnt.pps ?
+ "devmap_xmit total" :
+ "devmap_xmit";
+
+ print_err(out->xmit_cnt.err || out->xmit_cnt.drop,
+ " %-20s " FMT_COLUMNl FMT_COLUMNl FMT_COLUMNl
+ __COLUMN(".2f") "\n",
+ str, XMIT(out->xmit_cnt.pps),
+ DROP(out->xmit_cnt.drop), out->xmit_cnt.err,
+ "drv_err/s", out->xmit_cnt.bavg, "bulk-avg");
+
+ stats_get_devmap_xmit(r, p, nr_cpus, NULL);
+ }
+
+ if (mask & SAMPLE_DEVMAP_XMIT_CNT_MULTI)
+ stats_get_devmap_xmit_multi(r, p, nr_cpus, NULL,
+ mask & SAMPLE_DEVMAP_XMIT_CNT);
+
+ if (sample_log_level & LL_DEFAULT ||
+ ((sample_log_level & LL_SIMPLE) && sample_err_exp)) {
+ sample_err_exp = false;
+ printf("\n");
+ }
+}
+
+int sample_setup_maps(struct bpf_map **maps)
+{
+ sample_n_cpus = libbpf_num_possible_cpus();
+
+ for (int i = 0; i < MAP_DEVMAP_XMIT_MULTI; i++) {
+ sample_map[i] = maps[i];
+
+ switch (i) {
+ case MAP_RX:
+ case MAP_CPUMAP_KTHREAD:
+ case MAP_DEVMAP_XMIT:
+ sample_map_count[i] = sample_n_cpus;
+ break;
+ case MAP_REDIRECT_ERR:
+ sample_map_count[i] =
+ XDP_REDIRECT_ERR_MAX * sample_n_cpus;
+ break;
+ case MAP_EXCEPTION:
+ sample_map_count[i] = XDP_ACTION_MAX * sample_n_cpus;
+ case MAP_CPUMAP_ENQUEUE:
+ sample_map_count[i] = sample_n_cpus * sample_n_cpus;
+ break;
+ default:
+ return -EINVAL;
+ }
+ if (bpf_map__set_max_entries(sample_map[i], sample_map_count[i]) < 0)
+ return -errno;
+ }
+ sample_map[MAP_DEVMAP_XMIT_MULTI] = maps[MAP_DEVMAP_XMIT_MULTI];
+ return 0;
+}
+
+static int sample_setup_maps_mappings(void)
+{
+ for (int i = 0; i < MAP_DEVMAP_XMIT_MULTI; i++) {
+ size_t size = sample_map_count[i] * sizeof(struct datarec);
+
+ sample_mmap[i] = mmap(NULL, size, PROT_READ | PROT_WRITE,
+ MAP_SHARED, bpf_map__fd(sample_map[i]), 0);
+ if (sample_mmap[i] == MAP_FAILED)
+ return -errno;
+ }
+ return 0;
+}
+
+int __sample_init(int mask)
+{
+ sigset_t st;
+
+ sigemptyset(&st);
+ sigaddset(&st, SIGQUIT);
+ sigaddset(&st, SIGINT);
+ sigaddset(&st, SIGTERM);
+
+ if (sigprocmask(SIG_BLOCK, &st, NULL) < 0)
+ return -errno;
+
+ sample_sig_fd = signalfd(-1, &st, SFD_CLOEXEC | SFD_NONBLOCK);
+ if (sample_sig_fd < 0)
+ return -errno;
+
+ sample_mask = mask;
+
+ return sample_setup_maps_mappings();
+}
+
+static int __sample_remove_xdp(int ifindex, __u32 prog_id, int xdp_flags)
+{
+ __u32 cur_prog_id = 0;
+ int ret;
+
+ if (prog_id) {
+ ret = bpf_xdp_query_id(ifindex, xdp_flags, &cur_prog_id);
+ if (ret < 0)
+ return -errno;
+
+ if (prog_id != cur_prog_id) {
+ print_always(
+ "Program on ifindex %d does not match installed "
+ "program, skipping unload\n",
+ ifindex);
+ return -ENOENT;
+ }
+ }
+
+ return bpf_xdp_detach(ifindex, xdp_flags, NULL);
+}
+
+int sample_install_xdp(struct bpf_program *xdp_prog, int ifindex, bool generic,
+ bool force)
+{
+ int ret, xdp_flags = 0;
+ __u32 prog_id = 0;
+
+ if (sample_xdp_cnt == 32) {
+ fprintf(stderr,
+ "Total limit for installed XDP programs in a sample reached\n");
+ return -ENOTSUP;
+ }
+
+ xdp_flags |= !force ? XDP_FLAGS_UPDATE_IF_NOEXIST : 0;
+ xdp_flags |= generic ? XDP_FLAGS_SKB_MODE : XDP_FLAGS_DRV_MODE;
+ ret = bpf_xdp_attach(ifindex, bpf_program__fd(xdp_prog), xdp_flags, NULL);
+ if (ret < 0) {
+ ret = -errno;
+ fprintf(stderr,
+ "Failed to install program \"%s\" on ifindex %d, mode = %s, "
+ "force = %s: %s\n",
+ bpf_program__name(xdp_prog), ifindex,
+ generic ? "skb" : "native", force ? "true" : "false",
+ strerror(-ret));
+ return ret;
+ }
+
+ ret = bpf_xdp_query_id(ifindex, xdp_flags, &prog_id);
+ if (ret < 0) {
+ ret = -errno;
+ fprintf(stderr,
+ "Failed to get XDP program id for ifindex %d, removing program: %s\n",
+ ifindex, strerror(errno));
+ __sample_remove_xdp(ifindex, 0, xdp_flags);
+ return ret;
+ }
+ sample_xdp_progs[sample_xdp_cnt++] =
+ (struct xdp_desc){ ifindex, prog_id, xdp_flags };
+
+ return 0;
+}
+
+static void sample_summary_print(void)
+{
+ double num = sample_out.rx_cnt.num;
+
+ if (sample_out.totals.rx) {
+ double pkts = sample_out.totals.rx;
+
+ print_always(" Packets received : %'-10llu\n",
+ sample_out.totals.rx);
+ print_always(" Average packets/s : %'-10.0f\n",
+ sample_round(pkts / num));
+ }
+ if (sample_out.totals.redir) {
+ double pkts = sample_out.totals.redir;
+
+ print_always(" Packets redirected : %'-10llu\n",
+ sample_out.totals.redir);
+ print_always(" Average redir/s : %'-10.0f\n",
+ sample_round(pkts / num));
+ }
+ if (sample_out.totals.drop)
+ print_always(" Rx dropped : %'-10llu\n",
+ sample_out.totals.drop);
+ if (sample_out.totals.drop_xmit)
+ print_always(" Tx dropped : %'-10llu\n",
+ sample_out.totals.drop_xmit);
+ if (sample_out.totals.err)
+ print_always(" Errors recorded : %'-10llu\n",
+ sample_out.totals.err);
+ if (sample_out.totals.xmit) {
+ double pkts = sample_out.totals.xmit;
+
+ print_always(" Packets transmitted : %'-10llu\n",
+ sample_out.totals.xmit);
+ print_always(" Average transmit/s : %'-10.0f\n",
+ sample_round(pkts / num));
+ }
+}
+
+void sample_exit(int status)
+{
+ size_t size;
+
+ for (int i = 0; i < NUM_MAP; i++) {
+ size = sample_map_count[i] * sizeof(**sample_mmap);
+ munmap(sample_mmap[i], size);
+ }
+ while (sample_xdp_cnt--) {
+ int i = sample_xdp_cnt, ifindex, xdp_flags;
+ __u32 prog_id;
+
+ prog_id = sample_xdp_progs[i].prog_id;
+ ifindex = sample_xdp_progs[i].ifindex;
+ xdp_flags = sample_xdp_progs[i].flags;
+
+ __sample_remove_xdp(ifindex, prog_id, xdp_flags);
+ }
+ sample_summary_print();
+ close(sample_sig_fd);
+ exit(status);
+}
+
+static int sample_stats_collect(struct stats_record *rec)
+{
+ int i;
+
+ if (sample_mask & SAMPLE_RX_CNT)
+ map_collect_percpu(sample_mmap[MAP_RX], &rec->rx_cnt);
+
+ if (sample_mask & SAMPLE_REDIRECT_CNT)
+ map_collect_percpu(sample_mmap[MAP_REDIRECT_ERR], &rec->redir_err[0]);
+
+ if (sample_mask & SAMPLE_REDIRECT_ERR_CNT) {
+ for (i = 1; i < XDP_REDIRECT_ERR_MAX; i++)
+ map_collect_percpu(&sample_mmap[MAP_REDIRECT_ERR][i * sample_n_cpus],
+ &rec->redir_err[i]);
+ }
+
+ if (sample_mask & SAMPLE_CPUMAP_ENQUEUE_CNT)
+ for (i = 0; i < sample_n_cpus; i++)
+ map_collect_percpu(&sample_mmap[MAP_CPUMAP_ENQUEUE][i * sample_n_cpus],
+ &rec->enq[i]);
+
+ if (sample_mask & SAMPLE_CPUMAP_KTHREAD_CNT)
+ map_collect_percpu(sample_mmap[MAP_CPUMAP_KTHREAD],
+ &rec->kthread);
+
+ if (sample_mask & SAMPLE_EXCEPTION_CNT)
+ for (i = 0; i < XDP_ACTION_MAX; i++)
+ map_collect_percpu(&sample_mmap[MAP_EXCEPTION][i * sample_n_cpus],
+ &rec->exception[i]);
+
+ if (sample_mask & SAMPLE_DEVMAP_XMIT_CNT)
+ map_collect_percpu(sample_mmap[MAP_DEVMAP_XMIT], &rec->devmap_xmit);
+
+ if (sample_mask & SAMPLE_DEVMAP_XMIT_CNT_MULTI) {
+ if (map_collect_percpu_devmap(bpf_map__fd(sample_map[MAP_DEVMAP_XMIT_MULTI]), rec) < 0)
+ return -EINVAL;
+ }
+ return 0;
+}
+
+static void sample_summary_update(struct sample_output *out)
+{
+ sample_out.totals.rx += out->totals.rx;
+ sample_out.totals.redir += out->totals.redir;
+ sample_out.totals.drop += out->totals.drop;
+ sample_out.totals.drop_xmit += out->totals.drop_xmit;
+ sample_out.totals.err += out->totals.err;
+ sample_out.totals.xmit += out->totals.xmit;
+ sample_out.rx_cnt.num++;
+}
+
+static void sample_stats_print(int mask, struct stats_record *cur,
+ struct stats_record *prev, char *prog_name)
+{
+ struct sample_output out = {};
+
+ if (mask & SAMPLE_RX_CNT)
+ stats_get_rx_cnt(cur, prev, 0, &out);
+ if (mask & SAMPLE_REDIRECT_CNT)
+ stats_get_redirect_cnt(cur, prev, 0, &out);
+ if (mask & SAMPLE_REDIRECT_ERR_CNT)
+ stats_get_redirect_err_cnt(cur, prev, 0, &out);
+ if (mask & SAMPLE_EXCEPTION_CNT)
+ stats_get_exception_cnt(cur, prev, 0, &out);
+ if (mask & SAMPLE_DEVMAP_XMIT_CNT)
+ stats_get_devmap_xmit(cur, prev, 0, &out);
+ else if (mask & SAMPLE_DEVMAP_XMIT_CNT_MULTI)
+ stats_get_devmap_xmit_multi(cur, prev, 0, &out,
+ mask & SAMPLE_DEVMAP_XMIT_CNT);
+ sample_summary_update(&out);
+
+ stats_print(prog_name, mask, cur, prev, &out);
+}
+
+void sample_switch_mode(void)
+{
+ sample_log_level ^= LL_DEBUG - 1;
+}
+
+static int sample_signal_cb(void)
+{
+ struct signalfd_siginfo si;
+ int r;
+
+ r = read(sample_sig_fd, &si, sizeof(si));
+ if (r < 0)
+ return -errno;
+
+ switch (si.ssi_signo) {
+ case SIGQUIT:
+ sample_switch_mode();
+ printf("\n");
+ break;
+ default:
+ printf("\n");
+ return 1;
+ }
+
+ return 0;
+}
+
+/* Pointer swap trick */
+static void swap(struct stats_record **a, struct stats_record **b)
+{
+ struct stats_record *tmp;
+
+ tmp = *a;
+ *a = *b;
+ *b = tmp;
+}
+
+static int sample_timer_cb(int timerfd, struct stats_record **rec,
+ struct stats_record **prev)
+{
+ char line[64] = "Summary";
+ int ret;
+ __u64 t;
+
+ ret = read(timerfd, &t, sizeof(t));
+ if (ret < 0)
+ return -errno;
+
+ swap(prev, rec);
+ ret = sample_stats_collect(*rec);
+ if (ret < 0)
+ return ret;
+
+ if (sample_xdp_cnt == 2 && !(sample_mask & SAMPLE_SKIP_HEADING)) {
+ char fi[IFNAMSIZ];
+ char to[IFNAMSIZ];
+ const char *f, *t;
+
+ f = t = NULL;
+ if (if_indextoname(sample_xdp_progs[0].ifindex, fi))
+ f = fi;
+ if (if_indextoname(sample_xdp_progs[1].ifindex, to))
+ t = to;
+
+ snprintf(line, sizeof(line), "%s->%s", f ?: "?", t ?: "?");
+ }
+
+ sample_stats_print(sample_mask, *rec, *prev, line);
+ return 0;
+}
+
+int sample_run(int interval, void (*post_cb)(void *), void *ctx)
+{
+ struct timespec ts = { interval, 0 };
+ struct itimerspec its = { ts, ts };
+ struct stats_record *rec, *prev;
+ struct pollfd pfd[2] = {};
+ int timerfd, ret;
+
+ if (!interval) {
+ fprintf(stderr, "Incorrect interval 0\n");
+ return -EINVAL;
+ }
+ sample_interval = interval;
+ /* Pretty print numbers */
+ setlocale(LC_NUMERIC, "en_US.UTF-8");
+
+ timerfd = timerfd_create(CLOCK_MONOTONIC, TFD_CLOEXEC | TFD_NONBLOCK);
+ if (timerfd < 0)
+ return -errno;
+ timerfd_settime(timerfd, 0, &its, NULL);
+
+ pfd[0].fd = sample_sig_fd;
+ pfd[0].events = POLLIN;
+
+ pfd[1].fd = timerfd;
+ pfd[1].events = POLLIN;
+
+ ret = -ENOMEM;
+ rec = alloc_stats_record();
+ if (!rec)
+ goto end;
+ prev = alloc_stats_record();
+ if (!prev)
+ goto end_rec;
+
+ ret = sample_stats_collect(rec);
+ if (ret < 0)
+ goto end_rec_prev;
+
+ for (;;) {
+ ret = poll(pfd, 2, -1);
+ if (ret < 0) {
+ if (errno == EINTR)
+ continue;
+ else
+ break;
+ }
+
+ if (pfd[0].revents & POLLIN)
+ ret = sample_signal_cb();
+ else if (pfd[1].revents & POLLIN)
+ ret = sample_timer_cb(timerfd, &rec, &prev);
+
+ if (ret)
+ break;
+
+ if (post_cb)
+ post_cb(ctx);
+ }
+
+end_rec_prev:
+ free_stats_record(prev);
+end_rec:
+ free_stats_record(rec);
+end:
+ close(timerfd);
+
+ return ret;
+}
+
+const char *get_driver_name(int ifindex)
+{
+ struct ethtool_drvinfo drv = {};
+ char ifname[IF_NAMESIZE];
+ static char drvname[32];
+ struct ifreq ifr = {};
+ int fd, r = 0;
+
+ fd = socket(AF_INET, SOCK_DGRAM, 0);
+ if (fd < 0)
+ return "[error]";
+
+ if (!if_indextoname(ifindex, ifname))
+ goto end;
+
+ drv.cmd = ETHTOOL_GDRVINFO;
+ safe_strncpy(ifr.ifr_name, ifname, sizeof(ifr.ifr_name));
+ ifr.ifr_data = (void *)&drv;
+
+ r = ioctl(fd, SIOCETHTOOL, &ifr);
+ if (r)
+ goto end;
+
+ safe_strncpy(drvname, drv.driver, sizeof(drvname));
+
+ close(fd);
+ return drvname;
+
+end:
+ r = errno;
+ close(fd);
+ return r == EOPNOTSUPP ? "loopback" : "[error]";
+}
+
+int get_mac_addr(int ifindex, void *mac_addr)
+{
+ char ifname[IF_NAMESIZE];
+ struct ifreq ifr = {};
+ int fd, r;
+
+ fd = socket(AF_INET, SOCK_DGRAM, 0);
+ if (fd < 0)
+ return -errno;
+
+ if (!if_indextoname(ifindex, ifname)) {
+ r = -errno;
+ goto end;
+ }
+
+ safe_strncpy(ifr.ifr_name, ifname, sizeof(ifr.ifr_name));
+
+ r = ioctl(fd, SIOCGIFHWADDR, &ifr);
+ if (r) {
+ r = -errno;
+ goto end;
+ }
+
+ memcpy(mac_addr, ifr.ifr_hwaddr.sa_data, 6 * sizeof(char));
+
+end:
+ close(fd);
+ return r;
+}
+
+__attribute__((constructor)) static void sample_ctor(void)
+{
+ if (libbpf_set_strict_mode(LIBBPF_STRICT_ALL) < 0) {
+ fprintf(stderr, "Failed to set libbpf strict mode: %s\n",
+ strerror(errno));
+ /* Just exit, nothing to cleanup right now */
+ exit(EXIT_FAIL_BPF);
+ }
+}
diff --git a/samples/bpf/xdp_sample_user.h b/samples/bpf/xdp_sample_user.h
new file mode 100644
index 000000000000..f45051679977
--- /dev/null
+++ b/samples/bpf/xdp_sample_user.h
@@ -0,0 +1,110 @@
+// SPDX-License-Identifier: GPL-2.0-only
+#ifndef XDP_SAMPLE_USER_H
+#define XDP_SAMPLE_USER_H
+
+#include <bpf/libbpf.h>
+#include <linux/compiler.h>
+
+#include "xdp_sample_shared.h"
+
+enum stats_mask {
+ _SAMPLE_REDIRECT_MAP = 1U << 0,
+ SAMPLE_RX_CNT = 1U << 1,
+ SAMPLE_REDIRECT_ERR_CNT = 1U << 2,
+ SAMPLE_CPUMAP_ENQUEUE_CNT = 1U << 3,
+ SAMPLE_CPUMAP_KTHREAD_CNT = 1U << 4,
+ SAMPLE_EXCEPTION_CNT = 1U << 5,
+ SAMPLE_DEVMAP_XMIT_CNT = 1U << 6,
+ SAMPLE_REDIRECT_CNT = 1U << 7,
+ SAMPLE_REDIRECT_MAP_CNT = SAMPLE_REDIRECT_CNT | _SAMPLE_REDIRECT_MAP,
+ SAMPLE_REDIRECT_ERR_MAP_CNT = SAMPLE_REDIRECT_ERR_CNT | _SAMPLE_REDIRECT_MAP,
+ SAMPLE_DEVMAP_XMIT_CNT_MULTI = 1U << 8,
+ SAMPLE_SKIP_HEADING = 1U << 9,
+};
+
+/* Exit return codes */
+#define EXIT_OK 0
+#define EXIT_FAIL 1
+#define EXIT_FAIL_OPTION 2
+#define EXIT_FAIL_XDP 3
+#define EXIT_FAIL_BPF 4
+#define EXIT_FAIL_MEM 5
+
+int sample_setup_maps(struct bpf_map **maps);
+int __sample_init(int mask);
+void sample_exit(int status);
+int sample_run(int interval, void (*post_cb)(void *), void *ctx);
+
+void sample_switch_mode(void);
+int sample_install_xdp(struct bpf_program *xdp_prog, int ifindex, bool generic,
+ bool force);
+void sample_usage(char *argv[], const struct option *long_options,
+ const char *doc, int mask, bool error);
+
+const char *get_driver_name(int ifindex);
+int get_mac_addr(int ifindex, void *mac_addr);
+
+#pragma GCC diagnostic push
+#ifndef __clang__
+#pragma GCC diagnostic ignored "-Wstringop-truncation"
+#endif
+__attribute__((unused))
+static inline char *safe_strncpy(char *dst, const char *src, size_t size)
+{
+ if (!size)
+ return dst;
+ strncpy(dst, src, size - 1);
+ dst[size - 1] = '\0';
+ return dst;
+}
+#pragma GCC diagnostic pop
+
+#define __attach_tp(name) \
+ ({ \
+ if (bpf_program__type(skel->progs.name) != BPF_PROG_TYPE_TRACING)\
+ return -EINVAL; \
+ skel->links.name = bpf_program__attach(skel->progs.name); \
+ if (!skel->links.name) \
+ return -errno; \
+ })
+
+#define sample_init_pre_load(skel) \
+ ({ \
+ skel->rodata->nr_cpus = libbpf_num_possible_cpus(); \
+ sample_setup_maps((struct bpf_map *[]){ \
+ skel->maps.rx_cnt, skel->maps.redir_err_cnt, \
+ skel->maps.cpumap_enqueue_cnt, \
+ skel->maps.cpumap_kthread_cnt, \
+ skel->maps.exception_cnt, skel->maps.devmap_xmit_cnt, \
+ skel->maps.devmap_xmit_cnt_multi }); \
+ })
+
+#define DEFINE_SAMPLE_INIT(name) \
+ static int sample_init(struct name *skel, int mask) \
+ { \
+ int ret; \
+ ret = __sample_init(mask); \
+ if (ret < 0) \
+ return ret; \
+ if (mask & SAMPLE_REDIRECT_MAP_CNT) \
+ __attach_tp(tp_xdp_redirect_map); \
+ if (mask & SAMPLE_REDIRECT_CNT) \
+ __attach_tp(tp_xdp_redirect); \
+ if (mask & SAMPLE_REDIRECT_ERR_MAP_CNT) \
+ __attach_tp(tp_xdp_redirect_map_err); \
+ if (mask & SAMPLE_REDIRECT_ERR_CNT) \
+ __attach_tp(tp_xdp_redirect_err); \
+ if (mask & SAMPLE_CPUMAP_ENQUEUE_CNT) \
+ __attach_tp(tp_xdp_cpumap_enqueue); \
+ if (mask & SAMPLE_CPUMAP_KTHREAD_CNT) \
+ __attach_tp(tp_xdp_cpumap_kthread); \
+ if (mask & SAMPLE_EXCEPTION_CNT) \
+ __attach_tp(tp_xdp_exception); \
+ if (mask & SAMPLE_DEVMAP_XMIT_CNT) \
+ __attach_tp(tp_xdp_devmap_xmit); \
+ if (mask & SAMPLE_DEVMAP_XMIT_CNT_MULTI) \
+ __attach_tp(tp_xdp_devmap_xmit_multi); \
+ return 0; \
+ }
+
+#endif
diff --git a/samples/bpf/xdp_tx_iptunnel_kern.c b/samples/bpf/xdp_tx_iptunnel_kern.c
index 575d57e4b8d6..0e2bca3a3fff 100644
--- a/samples/bpf/xdp_tx_iptunnel_kern.c
+++ b/samples/bpf/xdp_tx_iptunnel_kern.c
@@ -212,7 +212,7 @@ static __always_inline int handle_ipv6(struct xdp_md *xdp)
return XDP_TX;
}
-SEC("xdp_tx_iptunnel")
+SEC("xdp.frags")
int _xdp_tx_iptunnel(struct xdp_md *xdp)
{
void *data_end = (void *)(long)xdp->data_end;
diff --git a/samples/bpf/xdp_tx_iptunnel_user.c b/samples/bpf/xdp_tx_iptunnel_user.c
index a419bee151a8..307baef6861a 100644
--- a/samples/bpf/xdp_tx_iptunnel_user.c
+++ b/samples/bpf/xdp_tx_iptunnel_user.c
@@ -10,7 +10,6 @@
#include <stdlib.h>
#include <string.h>
#include <net/if.h>
-#include <sys/resource.h>
#include <arpa/inet.h>
#include <netinet/ether.h>
#include <unistd.h>
@@ -32,12 +31,12 @@ static void int_exit(int sig)
__u32 curr_prog_id = 0;
if (ifindex > -1) {
- if (bpf_get_link_xdp_id(ifindex, &curr_prog_id, xdp_flags)) {
- printf("bpf_get_link_xdp_id failed\n");
+ if (bpf_xdp_query_id(ifindex, xdp_flags, &curr_prog_id)) {
+ printf("bpf_xdp_query_id failed\n");
exit(1);
}
if (prog_id == curr_prog_id)
- bpf_set_link_xdp_fd(ifindex, -1, xdp_flags);
+ bpf_xdp_detach(ifindex, xdp_flags, NULL);
else if (!curr_prog_id)
printf("couldn't find a prog id on a given iface\n");
else
@@ -152,10 +151,6 @@ static int parse_ports(const char *port_str, int *min_port, int *max_port)
int main(int argc, char **argv)
{
- struct bpf_prog_load_attr prog_load_attr = {
- .prog_type = BPF_PROG_TYPE_XDP,
- };
- struct rlimit r = {RLIM_INFINITY, RLIM_INFINITY};
int min_port = 0, max_port = 0, vip2tnl_map_fd;
const char *optstr = "i:a:p:s:d:m:T:P:FSNh";
unsigned char opt_flags[256] = {};
@@ -163,6 +158,7 @@ int main(int argc, char **argv)
__u32 info_len = sizeof(info);
unsigned int kill_after_s = 0;
struct iptnl_info tnl = {};
+ struct bpf_program *prog;
struct bpf_object *obj;
struct vip vip = {};
char filename[256];
@@ -254,26 +250,26 @@ int main(int argc, char **argv)
}
}
- if (setrlimit(RLIMIT_MEMLOCK, &r)) {
- perror("setrlimit(RLIMIT_MEMLOCK, RLIM_INFINITY)");
- return 1;
- }
-
if (!ifindex) {
fprintf(stderr, "Invalid ifname\n");
return 1;
}
snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]);
- prog_load_attr.file = filename;
- if (bpf_prog_load_xattr(&prog_load_attr, &obj, &prog_fd))
+ obj = bpf_object__open_file(filename, NULL);
+ if (libbpf_get_error(obj))
return 1;
- if (!prog_fd) {
- printf("bpf_prog_load_xattr: %s\n", strerror(errno));
+ prog = bpf_object__next_program(obj, NULL);
+ bpf_program__set_type(prog, BPF_PROG_TYPE_XDP);
+
+ err = bpf_object__load(obj);
+ if (err) {
+ printf("bpf_object__load(): %s\n", strerror(errno));
return 1;
}
+ prog_fd = bpf_program__fd(prog);
rxcnt_map_fd = bpf_object__find_map_fd_by_name(obj, "rxcnt");
vip2tnl_map_fd = bpf_object__find_map_fd_by_name(obj, "vip2tnl");
@@ -294,7 +290,7 @@ int main(int argc, char **argv)
}
}
- if (bpf_set_link_xdp_fd(ifindex, prog_fd, xdp_flags) < 0) {
+ if (bpf_xdp_attach(ifindex, prog_fd, xdp_flags, NULL) < 0) {
printf("link set xdp fd failed\n");
return 1;
}
@@ -308,7 +304,7 @@ int main(int argc, char **argv)
poll_stats(kill_after_s);
- bpf_set_link_xdp_fd(ifindex, -1, xdp_flags);
+ bpf_xdp_detach(ifindex, xdp_flags, NULL);
return 0;
}
diff --git a/samples/bpf/xdpsock.h b/samples/bpf/xdpsock.h
deleted file mode 100644
index b7eca15c78cc..000000000000
--- a/samples/bpf/xdpsock.h
+++ /dev/null
@@ -1,11 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0
- *
- * Copyright(c) 2019 Intel Corporation.
- */
-
-#ifndef XDPSOCK_H_
-#define XDPSOCK_H_
-
-#define MAX_SOCKS 4
-
-#endif /* XDPSOCK_H */
diff --git a/samples/bpf/xdpsock_kern.c b/samples/bpf/xdpsock_kern.c
deleted file mode 100644
index 05430484375c..000000000000
--- a/samples/bpf/xdpsock_kern.c
+++ /dev/null
@@ -1,24 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-#include <linux/bpf.h>
-#include <bpf/bpf_helpers.h>
-#include "xdpsock.h"
-
-/* This XDP program is only needed for the XDP_SHARED_UMEM mode.
- * If you do not use this mode, libbpf can supply an XDP program for you.
- */
-
-struct {
- __uint(type, BPF_MAP_TYPE_XSKMAP);
- __uint(max_entries, MAX_SOCKS);
- __uint(key_size, sizeof(int));
- __uint(value_size, sizeof(int));
-} xsks_map SEC(".maps");
-
-static unsigned int rr;
-
-SEC("xdp_sock") int xdp_sock_prog(struct xdp_md *ctx)
-{
- rr = (rr + 1) & (MAX_SOCKS - 1);
-
- return bpf_redirect_map(&xsks_map, rr, XDP_DROP);
-}
diff --git a/samples/bpf/xdpsock_user.c b/samples/bpf/xdpsock_user.c
deleted file mode 100644
index c91e91362a0c..000000000000
--- a/samples/bpf/xdpsock_user.c
+++ /dev/null
@@ -1,1212 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/* Copyright(c) 2017 - 2018 Intel Corporation. */
-
-#include <asm/barrier.h>
-#include <errno.h>
-#include <getopt.h>
-#include <libgen.h>
-#include <linux/bpf.h>
-#include <linux/compiler.h>
-#include <linux/if_link.h>
-#include <linux/if_xdp.h>
-#include <linux/if_ether.h>
-#include <linux/ip.h>
-#include <linux/udp.h>
-#include <arpa/inet.h>
-#include <locale.h>
-#include <net/ethernet.h>
-#include <net/if.h>
-#include <poll.h>
-#include <pthread.h>
-#include <signal.h>
-#include <stdbool.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <sys/mman.h>
-#include <sys/resource.h>
-#include <sys/socket.h>
-#include <sys/types.h>
-#include <time.h>
-#include <unistd.h>
-
-#include <bpf/libbpf.h>
-#include <bpf/xsk.h>
-#include <bpf/bpf.h>
-#include "xdpsock.h"
-
-#ifndef SOL_XDP
-#define SOL_XDP 283
-#endif
-
-#ifndef AF_XDP
-#define AF_XDP 44
-#endif
-
-#ifndef PF_XDP
-#define PF_XDP AF_XDP
-#endif
-
-#define NUM_FRAMES (4 * 1024)
-#define MIN_PKT_SIZE 64
-
-#define DEBUG_HEXDUMP 0
-
-typedef __u64 u64;
-typedef __u32 u32;
-typedef __u16 u16;
-typedef __u8 u8;
-
-static unsigned long prev_time;
-
-enum benchmark_type {
- BENCH_RXDROP = 0,
- BENCH_TXONLY = 1,
- BENCH_L2FWD = 2,
-};
-
-static enum benchmark_type opt_bench = BENCH_RXDROP;
-static u32 opt_xdp_flags = XDP_FLAGS_UPDATE_IF_NOEXIST;
-static const char *opt_if = "";
-static int opt_ifindex;
-static int opt_queue;
-static unsigned long opt_duration;
-static unsigned long start_time;
-static bool benchmark_done;
-static u32 opt_batch_size = 64;
-static int opt_pkt_count;
-static u16 opt_pkt_size = MIN_PKT_SIZE;
-static u32 opt_pkt_fill_pattern = 0x12345678;
-static int opt_poll;
-static int opt_interval = 1;
-static u32 opt_xdp_bind_flags = XDP_USE_NEED_WAKEUP;
-static u32 opt_umem_flags;
-static int opt_unaligned_chunks;
-static int opt_mmap_flags;
-static int opt_xsk_frame_size = XSK_UMEM__DEFAULT_FRAME_SIZE;
-static int opt_timeout = 1000;
-static bool opt_need_wakeup = true;
-static u32 opt_num_xsks = 1;
-static u32 prog_id;
-
-struct xsk_umem_info {
- struct xsk_ring_prod fq;
- struct xsk_ring_cons cq;
- struct xsk_umem *umem;
- void *buffer;
-};
-
-struct xsk_socket_info {
- struct xsk_ring_cons rx;
- struct xsk_ring_prod tx;
- struct xsk_umem_info *umem;
- struct xsk_socket *xsk;
- unsigned long rx_npkts;
- unsigned long tx_npkts;
- unsigned long prev_rx_npkts;
- unsigned long prev_tx_npkts;
- u32 outstanding_tx;
-};
-
-static int num_socks;
-struct xsk_socket_info *xsks[MAX_SOCKS];
-
-static unsigned long get_nsecs(void)
-{
- struct timespec ts;
-
- clock_gettime(CLOCK_MONOTONIC, &ts);
- return ts.tv_sec * 1000000000UL + ts.tv_nsec;
-}
-
-static void print_benchmark(bool running)
-{
- const char *bench_str = "INVALID";
-
- if (opt_bench == BENCH_RXDROP)
- bench_str = "rxdrop";
- else if (opt_bench == BENCH_TXONLY)
- bench_str = "txonly";
- else if (opt_bench == BENCH_L2FWD)
- bench_str = "l2fwd";
-
- printf("%s:%d %s ", opt_if, opt_queue, bench_str);
- if (opt_xdp_flags & XDP_FLAGS_SKB_MODE)
- printf("xdp-skb ");
- else if (opt_xdp_flags & XDP_FLAGS_DRV_MODE)
- printf("xdp-drv ");
- else
- printf(" ");
-
- if (opt_poll)
- printf("poll() ");
-
- if (running) {
- printf("running...");
- fflush(stdout);
- }
-}
-
-static void dump_stats(void)
-{
- unsigned long now = get_nsecs();
- long dt = now - prev_time;
- int i;
-
- prev_time = now;
-
- for (i = 0; i < num_socks && xsks[i]; i++) {
- char *fmt = "%-15s %'-11.0f %'-11lu\n";
- double rx_pps, tx_pps;
-
- rx_pps = (xsks[i]->rx_npkts - xsks[i]->prev_rx_npkts) *
- 1000000000. / dt;
- tx_pps = (xsks[i]->tx_npkts - xsks[i]->prev_tx_npkts) *
- 1000000000. / dt;
-
- printf("\n sock%d@", i);
- print_benchmark(false);
- printf("\n");
-
- printf("%-15s %-11s %-11s %-11.2f\n", "", "pps", "pkts",
- dt / 1000000000.);
- printf(fmt, "rx", rx_pps, xsks[i]->rx_npkts);
- printf(fmt, "tx", tx_pps, xsks[i]->tx_npkts);
-
- xsks[i]->prev_rx_npkts = xsks[i]->rx_npkts;
- xsks[i]->prev_tx_npkts = xsks[i]->tx_npkts;
- }
-}
-
-static bool is_benchmark_done(void)
-{
- if (opt_duration > 0) {
- unsigned long dt = (get_nsecs() - start_time);
-
- if (dt >= opt_duration)
- benchmark_done = true;
- }
- return benchmark_done;
-}
-
-static void *poller(void *arg)
-{
- (void)arg;
- while (!is_benchmark_done()) {
- sleep(opt_interval);
- dump_stats();
- }
-
- return NULL;
-}
-
-static void remove_xdp_program(void)
-{
- u32 curr_prog_id = 0;
-
- if (bpf_get_link_xdp_id(opt_ifindex, &curr_prog_id, opt_xdp_flags)) {
- printf("bpf_get_link_xdp_id failed\n");
- exit(EXIT_FAILURE);
- }
- if (prog_id == curr_prog_id)
- bpf_set_link_xdp_fd(opt_ifindex, -1, opt_xdp_flags);
- else if (!curr_prog_id)
- printf("couldn't find a prog id on a given interface\n");
- else
- printf("program on interface changed, not removing\n");
-}
-
-static void int_exit(int sig)
-{
- benchmark_done = true;
-}
-
-static void xdpsock_cleanup(void)
-{
- struct xsk_umem *umem = xsks[0]->umem->umem;
- int i;
-
- dump_stats();
- for (i = 0; i < num_socks; i++)
- xsk_socket__delete(xsks[i]->xsk);
- (void)xsk_umem__delete(umem);
- remove_xdp_program();
-}
-
-static void __exit_with_error(int error, const char *file, const char *func,
- int line)
-{
- fprintf(stderr, "%s:%s:%i: errno: %d/\"%s\"\n", file, func,
- line, error, strerror(error));
- dump_stats();
- remove_xdp_program();
- exit(EXIT_FAILURE);
-}
-
-#define exit_with_error(error) __exit_with_error(error, __FILE__, __func__, \
- __LINE__)
-static void swap_mac_addresses(void *data)
-{
- struct ether_header *eth = (struct ether_header *)data;
- struct ether_addr *src_addr = (struct ether_addr *)&eth->ether_shost;
- struct ether_addr *dst_addr = (struct ether_addr *)&eth->ether_dhost;
- struct ether_addr tmp;
-
- tmp = *src_addr;
- *src_addr = *dst_addr;
- *dst_addr = tmp;
-}
-
-static void hex_dump(void *pkt, size_t length, u64 addr)
-{
- const unsigned char *address = (unsigned char *)pkt;
- const unsigned char *line = address;
- size_t line_size = 32;
- unsigned char c;
- char buf[32];
- int i = 0;
-
- if (!DEBUG_HEXDUMP)
- return;
-
- sprintf(buf, "addr=%llu", addr);
- printf("length = %zu\n", length);
- printf("%s | ", buf);
- while (length-- > 0) {
- printf("%02X ", *address++);
- if (!(++i % line_size) || (length == 0 && i % line_size)) {
- if (length == 0) {
- while (i++ % line_size)
- printf("__ ");
- }
- printf(" | "); /* right close */
- while (line < address) {
- c = *line++;
- printf("%c", (c < 33 || c == 255) ? 0x2E : c);
- }
- printf("\n");
- if (length > 0)
- printf("%s | ", buf);
- }
- }
- printf("\n");
-}
-
-static void *memset32_htonl(void *dest, u32 val, u32 size)
-{
- u32 *ptr = (u32 *)dest;
- int i;
-
- val = htonl(val);
-
- for (i = 0; i < (size & (~0x3)); i += 4)
- ptr[i >> 2] = val;
-
- for (; i < size; i++)
- ((char *)dest)[i] = ((char *)&val)[i & 3];
-
- return dest;
-}
-
-/*
- * This function code has been taken from
- * Linux kernel lib/checksum.c
- */
-static inline unsigned short from32to16(unsigned int x)
-{
- /* add up 16-bit and 16-bit for 16+c bit */
- x = (x & 0xffff) + (x >> 16);
- /* add up carry.. */
- x = (x & 0xffff) + (x >> 16);
- return x;
-}
-
-/*
- * This function code has been taken from
- * Linux kernel lib/checksum.c
- */
-static unsigned int do_csum(const unsigned char *buff, int len)
-{
- unsigned int result = 0;
- int odd;
-
- if (len <= 0)
- goto out;
- odd = 1 & (unsigned long)buff;
- if (odd) {
-#ifdef __LITTLE_ENDIAN
- result += (*buff << 8);
-#else
- result = *buff;
-#endif
- len--;
- buff++;
- }
- if (len >= 2) {
- if (2 & (unsigned long)buff) {
- result += *(unsigned short *)buff;
- len -= 2;
- buff += 2;
- }
- if (len >= 4) {
- const unsigned char *end = buff +
- ((unsigned int)len & ~3);
- unsigned int carry = 0;
-
- do {
- unsigned int w = *(unsigned int *)buff;
-
- buff += 4;
- result += carry;
- result += w;
- carry = (w > result);
- } while (buff < end);
- result += carry;
- result = (result & 0xffff) + (result >> 16);
- }
- if (len & 2) {
- result += *(unsigned short *)buff;
- buff += 2;
- }
- }
- if (len & 1)
-#ifdef __LITTLE_ENDIAN
- result += *buff;
-#else
- result += (*buff << 8);
-#endif
- result = from32to16(result);
- if (odd)
- result = ((result >> 8) & 0xff) | ((result & 0xff) << 8);
-out:
- return result;
-}
-
-__sum16 ip_fast_csum(const void *iph, unsigned int ihl);
-
-/*
- * This is a version of ip_compute_csum() optimized for IP headers,
- * which always checksum on 4 octet boundaries.
- * This function code has been taken from
- * Linux kernel lib/checksum.c
- */
-__sum16 ip_fast_csum(const void *iph, unsigned int ihl)
-{
- return (__force __sum16)~do_csum(iph, ihl * 4);
-}
-
-/*
- * Fold a partial checksum
- * This function code has been taken from
- * Linux kernel include/asm-generic/checksum.h
- */
-static inline __sum16 csum_fold(__wsum csum)
-{
- u32 sum = (__force u32)csum;
-
- sum = (sum & 0xffff) + (sum >> 16);
- sum = (sum & 0xffff) + (sum >> 16);
- return (__force __sum16)~sum;
-}
-
-/*
- * This function code has been taken from
- * Linux kernel lib/checksum.c
- */
-static inline u32 from64to32(u64 x)
-{
- /* add up 32-bit and 32-bit for 32+c bit */
- x = (x & 0xffffffff) + (x >> 32);
- /* add up carry.. */
- x = (x & 0xffffffff) + (x >> 32);
- return (u32)x;
-}
-
-__wsum csum_tcpudp_nofold(__be32 saddr, __be32 daddr,
- __u32 len, __u8 proto, __wsum sum);
-
-/*
- * This function code has been taken from
- * Linux kernel lib/checksum.c
- */
-__wsum csum_tcpudp_nofold(__be32 saddr, __be32 daddr,
- __u32 len, __u8 proto, __wsum sum)
-{
- unsigned long long s = (__force u32)sum;
-
- s += (__force u32)saddr;
- s += (__force u32)daddr;
-#ifdef __BIG_ENDIAN__
- s += proto + len;
-#else
- s += (proto + len) << 8;
-#endif
- return (__force __wsum)from64to32(s);
-}
-
-/*
- * This function has been taken from
- * Linux kernel include/asm-generic/checksum.h
- */
-static inline __sum16
-csum_tcpudp_magic(__be32 saddr, __be32 daddr, __u32 len,
- __u8 proto, __wsum sum)
-{
- return csum_fold(csum_tcpudp_nofold(saddr, daddr, len, proto, sum));
-}
-
-static inline u16 udp_csum(u32 saddr, u32 daddr, u32 len,
- u8 proto, u16 *udp_pkt)
-{
- u32 csum = 0;
- u32 cnt = 0;
-
- /* udp hdr and data */
- for (; cnt < len; cnt += 2)
- csum += udp_pkt[cnt >> 1];
-
- return csum_tcpudp_magic(saddr, daddr, len, proto, csum);
-}
-
-#define ETH_FCS_SIZE 4
-
-#define PKT_HDR_SIZE (sizeof(struct ethhdr) + sizeof(struct iphdr) + \
- sizeof(struct udphdr))
-
-#define PKT_SIZE (opt_pkt_size - ETH_FCS_SIZE)
-#define IP_PKT_SIZE (PKT_SIZE - sizeof(struct ethhdr))
-#define UDP_PKT_SIZE (IP_PKT_SIZE - sizeof(struct iphdr))
-#define UDP_PKT_DATA_SIZE (UDP_PKT_SIZE - sizeof(struct udphdr))
-
-static u8 pkt_data[XSK_UMEM__DEFAULT_FRAME_SIZE];
-
-static void gen_eth_hdr_data(void)
-{
- struct udphdr *udp_hdr = (struct udphdr *)(pkt_data +
- sizeof(struct ethhdr) +
- sizeof(struct iphdr));
- struct iphdr *ip_hdr = (struct iphdr *)(pkt_data +
- sizeof(struct ethhdr));
- struct ethhdr *eth_hdr = (struct ethhdr *)pkt_data;
-
- /* ethernet header */
- memcpy(eth_hdr->h_dest, "\x3c\xfd\xfe\x9e\x7f\x71", ETH_ALEN);
- memcpy(eth_hdr->h_source, "\xec\xb1\xd7\x98\x3a\xc0", ETH_ALEN);
- eth_hdr->h_proto = htons(ETH_P_IP);
-
- /* IP header */
- ip_hdr->version = IPVERSION;
- ip_hdr->ihl = 0x5; /* 20 byte header */
- ip_hdr->tos = 0x0;
- ip_hdr->tot_len = htons(IP_PKT_SIZE);
- ip_hdr->id = 0;
- ip_hdr->frag_off = 0;
- ip_hdr->ttl = IPDEFTTL;
- ip_hdr->protocol = IPPROTO_UDP;
- ip_hdr->saddr = htonl(0x0a0a0a10);
- ip_hdr->daddr = htonl(0x0a0a0a20);
-
- /* IP header checksum */
- ip_hdr->check = 0;
- ip_hdr->check = ip_fast_csum((const void *)ip_hdr, ip_hdr->ihl);
-
- /* UDP header */
- udp_hdr->source = htons(0x1000);
- udp_hdr->dest = htons(0x1000);
- udp_hdr->len = htons(UDP_PKT_SIZE);
-
- /* UDP data */
- memset32_htonl(pkt_data + PKT_HDR_SIZE, opt_pkt_fill_pattern,
- UDP_PKT_DATA_SIZE);
-
- /* UDP header checksum */
- udp_hdr->check = 0;
- udp_hdr->check = udp_csum(ip_hdr->saddr, ip_hdr->daddr, UDP_PKT_SIZE,
- IPPROTO_UDP, (u16 *)udp_hdr);
-}
-
-static void gen_eth_frame(struct xsk_umem_info *umem, u64 addr)
-{
- memcpy(xsk_umem__get_data(umem->buffer, addr), pkt_data,
- PKT_SIZE);
-}
-
-static struct xsk_umem_info *xsk_configure_umem(void *buffer, u64 size)
-{
- struct xsk_umem_info *umem;
- struct xsk_umem_config cfg = {
- .fill_size = XSK_RING_PROD__DEFAULT_NUM_DESCS,
- .comp_size = XSK_RING_CONS__DEFAULT_NUM_DESCS,
- .frame_size = opt_xsk_frame_size,
- .frame_headroom = XSK_UMEM__DEFAULT_FRAME_HEADROOM,
- .flags = opt_umem_flags
- };
- int ret;
-
- umem = calloc(1, sizeof(*umem));
- if (!umem)
- exit_with_error(errno);
-
- ret = xsk_umem__create(&umem->umem, buffer, size, &umem->fq, &umem->cq,
- &cfg);
- if (ret)
- exit_with_error(-ret);
-
- umem->buffer = buffer;
- return umem;
-}
-
-static void xsk_populate_fill_ring(struct xsk_umem_info *umem)
-{
- int ret, i;
- u32 idx;
-
- ret = xsk_ring_prod__reserve(&umem->fq,
- XSK_RING_PROD__DEFAULT_NUM_DESCS, &idx);
- if (ret != XSK_RING_PROD__DEFAULT_NUM_DESCS)
- exit_with_error(-ret);
- for (i = 0; i < XSK_RING_PROD__DEFAULT_NUM_DESCS; i++)
- *xsk_ring_prod__fill_addr(&umem->fq, idx++) =
- i * opt_xsk_frame_size;
- xsk_ring_prod__submit(&umem->fq, XSK_RING_PROD__DEFAULT_NUM_DESCS);
-}
-
-static struct xsk_socket_info *xsk_configure_socket(struct xsk_umem_info *umem,
- bool rx, bool tx)
-{
- struct xsk_socket_config cfg;
- struct xsk_socket_info *xsk;
- struct xsk_ring_cons *rxr;
- struct xsk_ring_prod *txr;
- int ret;
-
- xsk = calloc(1, sizeof(*xsk));
- if (!xsk)
- exit_with_error(errno);
-
- xsk->umem = umem;
- cfg.rx_size = XSK_RING_CONS__DEFAULT_NUM_DESCS;
- cfg.tx_size = XSK_RING_PROD__DEFAULT_NUM_DESCS;
- if (opt_num_xsks > 1)
- cfg.libbpf_flags = XSK_LIBBPF_FLAGS__INHIBIT_PROG_LOAD;
- else
- cfg.libbpf_flags = 0;
- cfg.xdp_flags = opt_xdp_flags;
- cfg.bind_flags = opt_xdp_bind_flags;
-
- rxr = rx ? &xsk->rx : NULL;
- txr = tx ? &xsk->tx : NULL;
- ret = xsk_socket__create(&xsk->xsk, opt_if, opt_queue, umem->umem,
- rxr, txr, &cfg);
- if (ret)
- exit_with_error(-ret);
-
- ret = bpf_get_link_xdp_id(opt_ifindex, &prog_id, opt_xdp_flags);
- if (ret)
- exit_with_error(-ret);
-
- return xsk;
-}
-
-static struct option long_options[] = {
- {"rxdrop", no_argument, 0, 'r'},
- {"txonly", no_argument, 0, 't'},
- {"l2fwd", no_argument, 0, 'l'},
- {"interface", required_argument, 0, 'i'},
- {"queue", required_argument, 0, 'q'},
- {"poll", no_argument, 0, 'p'},
- {"xdp-skb", no_argument, 0, 'S'},
- {"xdp-native", no_argument, 0, 'N'},
- {"interval", required_argument, 0, 'n'},
- {"zero-copy", no_argument, 0, 'z'},
- {"copy", no_argument, 0, 'c'},
- {"frame-size", required_argument, 0, 'f'},
- {"no-need-wakeup", no_argument, 0, 'm'},
- {"unaligned", no_argument, 0, 'u'},
- {"shared-umem", no_argument, 0, 'M'},
- {"force", no_argument, 0, 'F'},
- {"duration", required_argument, 0, 'd'},
- {"batch-size", required_argument, 0, 'b'},
- {"tx-pkt-count", required_argument, 0, 'C'},
- {"tx-pkt-size", required_argument, 0, 's'},
- {"tx-pkt-pattern", required_argument, 0, 'P'},
- {0, 0, 0, 0}
-};
-
-static void usage(const char *prog)
-{
- const char *str =
- " Usage: %s [OPTIONS]\n"
- " Options:\n"
- " -r, --rxdrop Discard all incoming packets (default)\n"
- " -t, --txonly Only send packets\n"
- " -l, --l2fwd MAC swap L2 forwarding\n"
- " -i, --interface=n Run on interface n\n"
- " -q, --queue=n Use queue n (default 0)\n"
- " -p, --poll Use poll syscall\n"
- " -S, --xdp-skb=n Use XDP skb-mod\n"
- " -N, --xdp-native=n Enforce XDP native mode\n"
- " -n, --interval=n Specify statistics update interval (default 1 sec).\n"
- " -z, --zero-copy Force zero-copy mode.\n"
- " -c, --copy Force copy mode.\n"
- " -m, --no-need-wakeup Turn off use of driver need wakeup flag.\n"
- " -f, --frame-size=n Set the frame size (must be a power of two in aligned mode, default is %d).\n"
- " -u, --unaligned Enable unaligned chunk placement\n"
- " -M, --shared-umem Enable XDP_SHARED_UMEM\n"
- " -F, --force Force loading the XDP prog\n"
- " -d, --duration=n Duration in secs to run command.\n"
- " Default: forever.\n"
- " -b, --batch-size=n Batch size for sending or receiving\n"
- " packets. Default: %d\n"
- " -C, --tx-pkt-count=n Number of packets to send.\n"
- " Default: Continuous packets.\n"
- " -s, --tx-pkt-size=n Transmit packet size.\n"
- " (Default: %d bytes)\n"
- " Min size: %d, Max size %d.\n"
- " -P, --tx-pkt-pattern=nPacket fill pattern. Default: 0x%x\n"
- "\n";
- fprintf(stderr, str, prog, XSK_UMEM__DEFAULT_FRAME_SIZE,
- opt_batch_size, MIN_PKT_SIZE, MIN_PKT_SIZE,
- XSK_UMEM__DEFAULT_FRAME_SIZE, opt_pkt_fill_pattern);
-
- exit(EXIT_FAILURE);
-}
-
-static void parse_command_line(int argc, char **argv)
-{
- int option_index, c;
-
- opterr = 0;
-
- for (;;) {
- c = getopt_long(argc, argv, "Frtli:q:pSNn:czf:muMd:b:C:s:P:",
- long_options, &option_index);
- if (c == -1)
- break;
-
- switch (c) {
- case 'r':
- opt_bench = BENCH_RXDROP;
- break;
- case 't':
- opt_bench = BENCH_TXONLY;
- break;
- case 'l':
- opt_bench = BENCH_L2FWD;
- break;
- case 'i':
- opt_if = optarg;
- break;
- case 'q':
- opt_queue = atoi(optarg);
- break;
- case 'p':
- opt_poll = 1;
- break;
- case 'S':
- opt_xdp_flags |= XDP_FLAGS_SKB_MODE;
- opt_xdp_bind_flags |= XDP_COPY;
- break;
- case 'N':
- /* default, set below */
- break;
- case 'n':
- opt_interval = atoi(optarg);
- break;
- case 'z':
- opt_xdp_bind_flags |= XDP_ZEROCOPY;
- break;
- case 'c':
- opt_xdp_bind_flags |= XDP_COPY;
- break;
- case 'u':
- opt_umem_flags |= XDP_UMEM_UNALIGNED_CHUNK_FLAG;
- opt_unaligned_chunks = 1;
- opt_mmap_flags = MAP_HUGETLB;
- break;
- case 'F':
- opt_xdp_flags &= ~XDP_FLAGS_UPDATE_IF_NOEXIST;
- break;
- case 'f':
- opt_xsk_frame_size = atoi(optarg);
- break;
- case 'm':
- opt_need_wakeup = false;
- opt_xdp_bind_flags &= ~XDP_USE_NEED_WAKEUP;
- break;
- case 'M':
- opt_num_xsks = MAX_SOCKS;
- break;
- case 'd':
- opt_duration = atoi(optarg);
- opt_duration *= 1000000000;
- break;
- case 'b':
- opt_batch_size = atoi(optarg);
- break;
- case 'C':
- opt_pkt_count = atoi(optarg);
- break;
- case 's':
- opt_pkt_size = atoi(optarg);
- if (opt_pkt_size > (XSK_UMEM__DEFAULT_FRAME_SIZE) ||
- opt_pkt_size < MIN_PKT_SIZE) {
- fprintf(stderr,
- "ERROR: Invalid frame size %d\n",
- opt_pkt_size);
- usage(basename(argv[0]));
- }
- break;
- case 'P':
- opt_pkt_fill_pattern = strtol(optarg, NULL, 16);
- break;
- default:
- usage(basename(argv[0]));
- }
- }
-
- if (!(opt_xdp_flags & XDP_FLAGS_SKB_MODE))
- opt_xdp_flags |= XDP_FLAGS_DRV_MODE;
-
- opt_ifindex = if_nametoindex(opt_if);
- if (!opt_ifindex) {
- fprintf(stderr, "ERROR: interface \"%s\" does not exist\n",
- opt_if);
- usage(basename(argv[0]));
- }
-
- if ((opt_xsk_frame_size & (opt_xsk_frame_size - 1)) &&
- !opt_unaligned_chunks) {
- fprintf(stderr, "--frame-size=%d is not a power of two\n",
- opt_xsk_frame_size);
- usage(basename(argv[0]));
- }
-}
-
-static void kick_tx(struct xsk_socket_info *xsk)
-{
- int ret;
-
- ret = sendto(xsk_socket__fd(xsk->xsk), NULL, 0, MSG_DONTWAIT, NULL, 0);
- if (ret >= 0 || errno == ENOBUFS || errno == EAGAIN ||
- errno == EBUSY || errno == ENETDOWN)
- return;
- exit_with_error(errno);
-}
-
-static inline void complete_tx_l2fwd(struct xsk_socket_info *xsk,
- struct pollfd *fds)
-{
- struct xsk_umem_info *umem = xsk->umem;
- u32 idx_cq = 0, idx_fq = 0;
- unsigned int rcvd;
- size_t ndescs;
-
- if (!xsk->outstanding_tx)
- return;
-
- if (!opt_need_wakeup || xsk_ring_prod__needs_wakeup(&xsk->tx))
- kick_tx(xsk);
-
- ndescs = (xsk->outstanding_tx > opt_batch_size) ? opt_batch_size :
- xsk->outstanding_tx;
-
- /* re-add completed Tx buffers */
- rcvd = xsk_ring_cons__peek(&umem->cq, ndescs, &idx_cq);
- if (rcvd > 0) {
- unsigned int i;
- int ret;
-
- ret = xsk_ring_prod__reserve(&umem->fq, rcvd, &idx_fq);
- while (ret != rcvd) {
- if (ret < 0)
- exit_with_error(-ret);
- if (xsk_ring_prod__needs_wakeup(&umem->fq))
- ret = poll(fds, num_socks, opt_timeout);
- ret = xsk_ring_prod__reserve(&umem->fq, rcvd, &idx_fq);
- }
-
- for (i = 0; i < rcvd; i++)
- *xsk_ring_prod__fill_addr(&umem->fq, idx_fq++) =
- *xsk_ring_cons__comp_addr(&umem->cq, idx_cq++);
-
- xsk_ring_prod__submit(&xsk->umem->fq, rcvd);
- xsk_ring_cons__release(&xsk->umem->cq, rcvd);
- xsk->outstanding_tx -= rcvd;
- xsk->tx_npkts += rcvd;
- }
-}
-
-static inline void complete_tx_only(struct xsk_socket_info *xsk,
- int batch_size)
-{
- unsigned int rcvd;
- u32 idx;
-
- if (!xsk->outstanding_tx)
- return;
-
- if (!opt_need_wakeup || xsk_ring_prod__needs_wakeup(&xsk->tx))
- kick_tx(xsk);
-
- rcvd = xsk_ring_cons__peek(&xsk->umem->cq, batch_size, &idx);
- if (rcvd > 0) {
- xsk_ring_cons__release(&xsk->umem->cq, rcvd);
- xsk->outstanding_tx -= rcvd;
- xsk->tx_npkts += rcvd;
- }
-}
-
-static void rx_drop(struct xsk_socket_info *xsk, struct pollfd *fds)
-{
- unsigned int rcvd, i;
- u32 idx_rx = 0, idx_fq = 0;
- int ret;
-
- rcvd = xsk_ring_cons__peek(&xsk->rx, opt_batch_size, &idx_rx);
- if (!rcvd) {
- if (xsk_ring_prod__needs_wakeup(&xsk->umem->fq))
- ret = poll(fds, num_socks, opt_timeout);
- return;
- }
-
- ret = xsk_ring_prod__reserve(&xsk->umem->fq, rcvd, &idx_fq);
- while (ret != rcvd) {
- if (ret < 0)
- exit_with_error(-ret);
- if (xsk_ring_prod__needs_wakeup(&xsk->umem->fq))
- ret = poll(fds, num_socks, opt_timeout);
- ret = xsk_ring_prod__reserve(&xsk->umem->fq, rcvd, &idx_fq);
- }
-
- for (i = 0; i < rcvd; i++) {
- u64 addr = xsk_ring_cons__rx_desc(&xsk->rx, idx_rx)->addr;
- u32 len = xsk_ring_cons__rx_desc(&xsk->rx, idx_rx++)->len;
- u64 orig = xsk_umem__extract_addr(addr);
-
- addr = xsk_umem__add_offset_to_addr(addr);
- char *pkt = xsk_umem__get_data(xsk->umem->buffer, addr);
-
- hex_dump(pkt, len, addr);
- *xsk_ring_prod__fill_addr(&xsk->umem->fq, idx_fq++) = orig;
- }
-
- xsk_ring_prod__submit(&xsk->umem->fq, rcvd);
- xsk_ring_cons__release(&xsk->rx, rcvd);
- xsk->rx_npkts += rcvd;
-}
-
-static void rx_drop_all(void)
-{
- struct pollfd fds[MAX_SOCKS] = {};
- int i, ret;
-
- for (i = 0; i < num_socks; i++) {
- fds[i].fd = xsk_socket__fd(xsks[i]->xsk);
- fds[i].events = POLLIN;
- }
-
- for (;;) {
- if (opt_poll) {
- ret = poll(fds, num_socks, opt_timeout);
- if (ret <= 0)
- continue;
- }
-
- for (i = 0; i < num_socks; i++)
- rx_drop(xsks[i], fds);
-
- if (benchmark_done)
- break;
- }
-}
-
-static void tx_only(struct xsk_socket_info *xsk, u32 frame_nb, int batch_size)
-{
- u32 idx;
- unsigned int i;
-
- while (xsk_ring_prod__reserve(&xsk->tx, batch_size, &idx) <
- batch_size) {
- complete_tx_only(xsk, batch_size);
- }
-
- for (i = 0; i < batch_size; i++) {
- struct xdp_desc *tx_desc = xsk_ring_prod__tx_desc(&xsk->tx,
- idx + i);
- tx_desc->addr = (frame_nb + i) << XSK_UMEM__DEFAULT_FRAME_SHIFT;
- tx_desc->len = PKT_SIZE;
- }
-
- xsk_ring_prod__submit(&xsk->tx, batch_size);
- xsk->outstanding_tx += batch_size;
- frame_nb += batch_size;
- frame_nb %= NUM_FRAMES;
- complete_tx_only(xsk, batch_size);
-}
-
-static inline int get_batch_size(int pkt_cnt)
-{
- if (!opt_pkt_count)
- return opt_batch_size;
-
- if (pkt_cnt + opt_batch_size <= opt_pkt_count)
- return opt_batch_size;
-
- return opt_pkt_count - pkt_cnt;
-}
-
-static void complete_tx_only_all(void)
-{
- bool pending;
- int i;
-
- do {
- pending = false;
- for (i = 0; i < num_socks; i++) {
- if (xsks[i]->outstanding_tx) {
- complete_tx_only(xsks[i], opt_batch_size);
- pending = !!xsks[i]->outstanding_tx;
- }
- }
- } while (pending);
-}
-
-static void tx_only_all(void)
-{
- struct pollfd fds[MAX_SOCKS] = {};
- u32 frame_nb[MAX_SOCKS] = {};
- int pkt_cnt = 0;
- int i, ret;
-
- for (i = 0; i < num_socks; i++) {
- fds[0].fd = xsk_socket__fd(xsks[i]->xsk);
- fds[0].events = POLLOUT;
- }
-
- while ((opt_pkt_count && pkt_cnt < opt_pkt_count) || !opt_pkt_count) {
- int batch_size = get_batch_size(pkt_cnt);
-
- if (opt_poll) {
- ret = poll(fds, num_socks, opt_timeout);
- if (ret <= 0)
- continue;
-
- if (!(fds[0].revents & POLLOUT))
- continue;
- }
-
- for (i = 0; i < num_socks; i++)
- tx_only(xsks[i], frame_nb[i], batch_size);
-
- pkt_cnt += batch_size;
-
- if (benchmark_done)
- break;
- }
-
- if (opt_pkt_count)
- complete_tx_only_all();
-}
-
-static void l2fwd(struct xsk_socket_info *xsk, struct pollfd *fds)
-{
- unsigned int rcvd, i;
- u32 idx_rx = 0, idx_tx = 0;
- int ret;
-
- complete_tx_l2fwd(xsk, fds);
-
- rcvd = xsk_ring_cons__peek(&xsk->rx, opt_batch_size, &idx_rx);
- if (!rcvd) {
- if (xsk_ring_prod__needs_wakeup(&xsk->umem->fq))
- ret = poll(fds, num_socks, opt_timeout);
- return;
- }
-
- ret = xsk_ring_prod__reserve(&xsk->tx, rcvd, &idx_tx);
- while (ret != rcvd) {
- if (ret < 0)
- exit_with_error(-ret);
- if (xsk_ring_prod__needs_wakeup(&xsk->tx))
- kick_tx(xsk);
- ret = xsk_ring_prod__reserve(&xsk->tx, rcvd, &idx_tx);
- }
-
- for (i = 0; i < rcvd; i++) {
- u64 addr = xsk_ring_cons__rx_desc(&xsk->rx, idx_rx)->addr;
- u32 len = xsk_ring_cons__rx_desc(&xsk->rx, idx_rx++)->len;
- u64 orig = addr;
-
- addr = xsk_umem__add_offset_to_addr(addr);
- char *pkt = xsk_umem__get_data(xsk->umem->buffer, addr);
-
- swap_mac_addresses(pkt);
-
- hex_dump(pkt, len, addr);
- xsk_ring_prod__tx_desc(&xsk->tx, idx_tx)->addr = orig;
- xsk_ring_prod__tx_desc(&xsk->tx, idx_tx++)->len = len;
- }
-
- xsk_ring_prod__submit(&xsk->tx, rcvd);
- xsk_ring_cons__release(&xsk->rx, rcvd);
-
- xsk->rx_npkts += rcvd;
- xsk->outstanding_tx += rcvd;
-}
-
-static void l2fwd_all(void)
-{
- struct pollfd fds[MAX_SOCKS] = {};
- int i, ret;
-
- for (i = 0; i < num_socks; i++) {
- fds[i].fd = xsk_socket__fd(xsks[i]->xsk);
- fds[i].events = POLLOUT | POLLIN;
- }
-
- for (;;) {
- if (opt_poll) {
- ret = poll(fds, num_socks, opt_timeout);
- if (ret <= 0)
- continue;
- }
-
- for (i = 0; i < num_socks; i++)
- l2fwd(xsks[i], fds);
-
- if (benchmark_done)
- break;
- }
-}
-
-static void load_xdp_program(char **argv, struct bpf_object **obj)
-{
- struct bpf_prog_load_attr prog_load_attr = {
- .prog_type = BPF_PROG_TYPE_XDP,
- };
- char xdp_filename[256];
- int prog_fd;
-
- snprintf(xdp_filename, sizeof(xdp_filename), "%s_kern.o", argv[0]);
- prog_load_attr.file = xdp_filename;
-
- if (bpf_prog_load_xattr(&prog_load_attr, obj, &prog_fd))
- exit(EXIT_FAILURE);
- if (prog_fd < 0) {
- fprintf(stderr, "ERROR: no program found: %s\n",
- strerror(prog_fd));
- exit(EXIT_FAILURE);
- }
-
- if (bpf_set_link_xdp_fd(opt_ifindex, prog_fd, opt_xdp_flags) < 0) {
- fprintf(stderr, "ERROR: link set xdp fd failed\n");
- exit(EXIT_FAILURE);
- }
-}
-
-static void enter_xsks_into_map(struct bpf_object *obj)
-{
- struct bpf_map *map;
- int i, xsks_map;
-
- map = bpf_object__find_map_by_name(obj, "xsks_map");
- xsks_map = bpf_map__fd(map);
- if (xsks_map < 0) {
- fprintf(stderr, "ERROR: no xsks map found: %s\n",
- strerror(xsks_map));
- exit(EXIT_FAILURE);
- }
-
- for (i = 0; i < num_socks; i++) {
- int fd = xsk_socket__fd(xsks[i]->xsk);
- int key, ret;
-
- key = i;
- ret = bpf_map_update_elem(xsks_map, &key, &fd, 0);
- if (ret) {
- fprintf(stderr, "ERROR: bpf_map_update_elem %d\n", i);
- exit(EXIT_FAILURE);
- }
- }
-}
-
-int main(int argc, char **argv)
-{
- struct rlimit r = {RLIM_INFINITY, RLIM_INFINITY};
- bool rx = false, tx = false;
- struct xsk_umem_info *umem;
- struct bpf_object *obj;
- pthread_t pt;
- int i, ret;
- void *bufs;
-
- parse_command_line(argc, argv);
-
- if (setrlimit(RLIMIT_MEMLOCK, &r)) {
- fprintf(stderr, "ERROR: setrlimit(RLIMIT_MEMLOCK) \"%s\"\n",
- strerror(errno));
- exit(EXIT_FAILURE);
- }
-
- if (opt_num_xsks > 1)
- load_xdp_program(argv, &obj);
-
- /* Reserve memory for the umem. Use hugepages if unaligned chunk mode */
- bufs = mmap(NULL, NUM_FRAMES * opt_xsk_frame_size,
- PROT_READ | PROT_WRITE,
- MAP_PRIVATE | MAP_ANONYMOUS | opt_mmap_flags, -1, 0);
- if (bufs == MAP_FAILED) {
- printf("ERROR: mmap failed\n");
- exit(EXIT_FAILURE);
- }
-
- /* Create sockets... */
- umem = xsk_configure_umem(bufs, NUM_FRAMES * opt_xsk_frame_size);
- if (opt_bench == BENCH_RXDROP || opt_bench == BENCH_L2FWD) {
- rx = true;
- xsk_populate_fill_ring(umem);
- }
- if (opt_bench == BENCH_L2FWD || opt_bench == BENCH_TXONLY)
- tx = true;
- for (i = 0; i < opt_num_xsks; i++)
- xsks[num_socks++] = xsk_configure_socket(umem, rx, tx);
-
- if (opt_bench == BENCH_TXONLY) {
- gen_eth_hdr_data();
-
- for (i = 0; i < NUM_FRAMES; i++)
- gen_eth_frame(umem, i * opt_xsk_frame_size);
- }
-
- if (opt_num_xsks > 1 && opt_bench != BENCH_TXONLY)
- enter_xsks_into_map(obj);
-
- signal(SIGINT, int_exit);
- signal(SIGTERM, int_exit);
- signal(SIGABRT, int_exit);
-
- setlocale(LC_ALL, "");
-
- ret = pthread_create(&pt, NULL, poller, NULL);
- if (ret)
- exit_with_error(ret);
-
- prev_time = get_nsecs();
- start_time = prev_time;
-
- if (opt_bench == BENCH_RXDROP)
- rx_drop_all();
- else if (opt_bench == BENCH_TXONLY)
- tx_only_all();
- else
- l2fwd_all();
-
- benchmark_done = true;
-
- pthread_join(pt, NULL);
-
- xdpsock_cleanup();
-
- return 0;
-}
diff --git a/samples/configfs/configfs_sample.c b/samples/configfs/configfs_sample.c
index e2398d94e8da..37a657b25d58 100644
--- a/samples/configfs/configfs_sample.c
+++ b/samples/configfs/configfs_sample.c
@@ -1,25 +1,21 @@
// SPDX-License-Identifier: GPL-2.0-or-later
/*
- * vim: noexpandtab ts=8 sts=0 sw=8:
- *
* configfs_example_macros.c - This file is a demonstration module
* containing a number of configfs subsystems. It uses the helper
* macros defined by configfs.h
*
* Based on sysfs:
- * sysfs is Copyright (C) 2001, 2002, 2003 Patrick Mochel
+ * sysfs is Copyright (C) 2001, 2002, 2003 Patrick Mochel
*
* configfs Copyright (C) 2005 Oracle. All rights reserved.
*/
#include <linux/init.h>
+#include <linux/kernel.h>
#include <linux/module.h>
#include <linux/slab.h>
-
#include <linux/configfs.h>
-
-
/*
* 01-childless
*
@@ -40,8 +36,8 @@ struct childless {
static inline struct childless *to_childless(struct config_item *item)
{
- return item ? container_of(to_configfs_subsystem(to_config_group(item)),
- struct childless, subsys) : NULL;
+ return container_of(to_configfs_subsystem(to_config_group(item)),
+ struct childless, subsys);
}
static ssize_t childless_showme_show(struct config_item *item, char *page)
@@ -64,17 +60,11 @@ static ssize_t childless_storeme_store(struct config_item *item,
const char *page, size_t count)
{
struct childless *childless = to_childless(item);
- unsigned long tmp;
- char *p = (char *) page;
-
- tmp = simple_strtoul(p, &p, 10);
- if (!p || (*p && (*p != '\n')))
- return -EINVAL;
-
- if (tmp > INT_MAX)
- return -ERANGE;
+ int ret;
- childless->storeme = tmp;
+ ret = kstrtoint(page, 10, &childless->storeme);
+ if (ret)
+ return ret;
return count;
}
@@ -117,7 +107,6 @@ static struct childless childless_subsys = {
},
};
-
/* ----------------------------------------------------------------- */
/*
@@ -136,7 +125,7 @@ struct simple_child {
static inline struct simple_child *to_simple_child(struct config_item *item)
{
- return item ? container_of(item, struct simple_child, item) : NULL;
+ return container_of(item, struct simple_child, item);
}
static ssize_t simple_child_storeme_show(struct config_item *item, char *page)
@@ -148,17 +137,11 @@ static ssize_t simple_child_storeme_store(struct config_item *item,
const char *page, size_t count)
{
struct simple_child *simple_child = to_simple_child(item);
- unsigned long tmp;
- char *p = (char *) page;
-
- tmp = simple_strtoul(p, &p, 10);
- if (!p || (*p && (*p != '\n')))
- return -EINVAL;
-
- if (tmp > INT_MAX)
- return -ERANGE;
+ int ret;
- simple_child->storeme = tmp;
+ ret = kstrtoint(page, 10, &simple_child->storeme);
+ if (ret)
+ return ret;
return count;
}
@@ -176,7 +159,7 @@ static void simple_child_release(struct config_item *item)
}
static struct configfs_item_operations simple_child_item_ops = {
- .release = simple_child_release,
+ .release = simple_child_release,
};
static const struct config_item_type simple_child_type = {
@@ -185,15 +168,14 @@ static const struct config_item_type simple_child_type = {
.ct_owner = THIS_MODULE,
};
-
struct simple_children {
struct config_group group;
};
static inline struct simple_children *to_simple_children(struct config_item *item)
{
- return item ? container_of(to_config_group(item),
- struct simple_children, group) : NULL;
+ return container_of(to_config_group(item),
+ struct simple_children, group);
}
static struct config_item *simple_children_make_item(struct config_group *group,
@@ -208,8 +190,6 @@ static struct config_item *simple_children_make_item(struct config_group *group,
config_item_init_type_name(&simple_child->item, name,
&simple_child_type);
- simple_child->storeme = 0;
-
return &simple_child->item;
}
@@ -263,7 +243,6 @@ static struct configfs_subsystem simple_children_subsys = {
},
};
-
/* ----------------------------------------------------------------- */
/*
@@ -350,9 +329,8 @@ static struct configfs_subsystem *example_subsys[] = {
static int __init configfs_example_init(void)
{
- int ret;
- int i;
struct configfs_subsystem *subsys;
+ int ret, i;
for (i = 0; example_subsys[i]; i++) {
subsys = example_subsys[i];
@@ -361,9 +339,8 @@ static int __init configfs_example_init(void)
mutex_init(&subsys->su_mutex);
ret = configfs_register_subsystem(subsys);
if (ret) {
- printk(KERN_ERR "Error %d while registering subsystem %s\n",
- ret,
- subsys->su_group.cg_item.ci_namebuf);
+ pr_err("Error %d while registering subsystem %s\n",
+ ret, subsys->su_group.cg_item.ci_namebuf);
goto out_unregister;
}
}
diff --git a/samples/connector/.gitignore b/samples/connector/.gitignore
index d2b9c32accd4..0e26039f39b5 100644
--- a/samples/connector/.gitignore
+++ b/samples/connector/.gitignore
@@ -1 +1,2 @@
-ucon
+# SPDX-License-Identifier: GPL-2.0-only
+/ucon
diff --git a/samples/connector/Makefile b/samples/connector/Makefile
index b785cbde5ffa..d98a9e047c11 100644
--- a/samples/connector/Makefile
+++ b/samples/connector/Makefile
@@ -1,13 +1,6 @@
# SPDX-License-Identifier: GPL-2.0
obj-$(CONFIG_SAMPLE_CONNECTOR) += cn_test.o
-# List of programs to build
-hostprogs := ucon
-always-y := $(hostprogs)
+userprogs-always-$(CONFIG_CC_CAN_LINK) += ucon
-HOSTCFLAGS_ucon.o += -I$(objtree)/usr/include
-
-all: modules
-
-modules clean:
- $(MAKE) -C ../.. M=$(CURDIR) $@
+userccflags += -I usr/include
diff --git a/samples/coresight/Makefile b/samples/coresight/Makefile
new file mode 100644
index 000000000000..b3fce4af2347
--- /dev/null
+++ b/samples/coresight/Makefile
@@ -0,0 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0-only
+
+obj-$(CONFIG_SAMPLE_CORESIGHT_SYSCFG) += coresight-cfg-sample.o
+ccflags-y += -I$(srctree)/drivers/hwtracing/coresight
diff --git a/samples/coresight/coresight-cfg-sample.c b/samples/coresight/coresight-cfg-sample.c
new file mode 100644
index 000000000000..25485c80b5e3
--- /dev/null
+++ b/samples/coresight/coresight-cfg-sample.c
@@ -0,0 +1,73 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright(C) 2020 Linaro Limited. All rights reserved.
+ * Author: Mike Leach <mike.leach@linaro.org>
+ */
+
+#include "coresight-config.h"
+#include "coresight-syscfg.h"
+
+/* create an alternate autofdo configuration */
+
+/* we will provide 4 sets of preset parameter values */
+#define AFDO2_NR_PRESETS 4
+/* the total number of parameters in used features - strobing has 2 */
+#define AFDO2_NR_PARAM_SUM 2
+
+static const char *afdo2_ref_names[] = {
+ "strobing",
+};
+
+/*
+ * set of presets leaves strobing window constant while varying period to allow
+ * experimentation with mark / space ratios for various workloads
+ */
+static u64 afdo2_presets[AFDO2_NR_PRESETS][AFDO2_NR_PARAM_SUM] = {
+ { 1000, 100 },
+ { 1000, 1000 },
+ { 1000, 5000 },
+ { 1000, 10000 },
+};
+
+struct cscfg_config_desc afdo2 = {
+ .name = "autofdo2",
+ .description = "Setup ETMs with strobing for autofdo\n"
+ "Supplied presets allow experimentation with mark-space ratio for various loads\n",
+ .nr_feat_refs = ARRAY_SIZE(afdo2_ref_names),
+ .feat_ref_names = afdo2_ref_names,
+ .nr_presets = AFDO2_NR_PRESETS,
+ .nr_total_params = AFDO2_NR_PARAM_SUM,
+ .presets = &afdo2_presets[0][0],
+};
+
+static struct cscfg_feature_desc *sample_feats[] = {
+ NULL
+};
+
+static struct cscfg_config_desc *sample_cfgs[] = {
+ &afdo2,
+ NULL
+};
+
+static struct cscfg_load_owner_info mod_owner = {
+ .type = CSCFG_OWNER_MODULE,
+ .owner_handle = THIS_MODULE,
+};
+
+/* module init and exit - just load and unload configs */
+static int __init cscfg_sample_init(void)
+{
+ return cscfg_load_config_sets(sample_cfgs, sample_feats, &mod_owner);
+}
+
+static void __exit cscfg_sample_exit(void)
+{
+ cscfg_unload_config_sets(&mod_owner);
+}
+
+module_init(cscfg_sample_init);
+module_exit(cscfg_sample_exit);
+
+MODULE_LICENSE("GPL v2");
+MODULE_AUTHOR("Mike Leach <mike.leach@linaro.org>");
+MODULE_DESCRIPTION("CoreSight Syscfg Example");
diff --git a/samples/fanotify/.gitignore b/samples/fanotify/.gitignore
new file mode 100644
index 000000000000..d74593e8b2de
--- /dev/null
+++ b/samples/fanotify/.gitignore
@@ -0,0 +1 @@
+fs-monitor
diff --git a/samples/fanotify/Makefile b/samples/fanotify/Makefile
new file mode 100644
index 000000000000..e20db1bdde3b
--- /dev/null
+++ b/samples/fanotify/Makefile
@@ -0,0 +1,5 @@
+# SPDX-License-Identifier: GPL-2.0-only
+userprogs-always-y += fs-monitor
+
+userccflags += -I usr/include -Wall
+
diff --git a/samples/fanotify/fs-monitor.c b/samples/fanotify/fs-monitor.c
new file mode 100644
index 000000000000..608db24c471e
--- /dev/null
+++ b/samples/fanotify/fs-monitor.c
@@ -0,0 +1,142 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright 2021, Collabora Ltd.
+ */
+
+#define _GNU_SOURCE
+#include <errno.h>
+#include <err.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <fcntl.h>
+#include <sys/fanotify.h>
+#include <sys/types.h>
+#include <unistd.h>
+
+#ifndef FAN_FS_ERROR
+#define FAN_FS_ERROR 0x00008000
+#define FAN_EVENT_INFO_TYPE_ERROR 5
+
+struct fanotify_event_info_error {
+ struct fanotify_event_info_header hdr;
+ __s32 error;
+ __u32 error_count;
+};
+#endif
+
+#ifndef FILEID_INO32_GEN
+#define FILEID_INO32_GEN 1
+#endif
+
+#ifndef FILEID_INVALID
+#define FILEID_INVALID 0xff
+#endif
+
+static void print_fh(struct file_handle *fh)
+{
+ int i;
+ uint32_t *h = (uint32_t *) fh->f_handle;
+
+ printf("\tfh: ");
+ for (i = 0; i < fh->handle_bytes; i++)
+ printf("%hhx", fh->f_handle[i]);
+ printf("\n");
+
+ printf("\tdecoded fh: ");
+ if (fh->handle_type == FILEID_INO32_GEN)
+ printf("inode=%u gen=%u\n", h[0], h[1]);
+ else if (fh->handle_type == FILEID_INVALID && !fh->handle_bytes)
+ printf("Type %d (Superblock error)\n", fh->handle_type);
+ else
+ printf("Type %d (Unknown)\n", fh->handle_type);
+
+}
+
+static void handle_notifications(char *buffer, int len)
+{
+ struct fanotify_event_metadata *event =
+ (struct fanotify_event_metadata *) buffer;
+ struct fanotify_event_info_header *info;
+ struct fanotify_event_info_error *err;
+ struct fanotify_event_info_fid *fid;
+ int off;
+
+ for (; FAN_EVENT_OK(event, len); event = FAN_EVENT_NEXT(event, len)) {
+
+ if (event->mask != FAN_FS_ERROR) {
+ printf("unexpected FAN MARK: %llx\n",
+ (unsigned long long)event->mask);
+ goto next_event;
+ }
+
+ if (event->fd != FAN_NOFD) {
+ printf("Unexpected fd (!= FAN_NOFD)\n");
+ goto next_event;
+ }
+
+ printf("FAN_FS_ERROR (len=%d)\n", event->event_len);
+
+ for (off = sizeof(*event) ; off < event->event_len;
+ off += info->len) {
+ info = (struct fanotify_event_info_header *)
+ ((char *) event + off);
+
+ switch (info->info_type) {
+ case FAN_EVENT_INFO_TYPE_ERROR:
+ err = (struct fanotify_event_info_error *) info;
+
+ printf("\tGeneric Error Record: len=%d\n",
+ err->hdr.len);
+ printf("\terror: %d\n", err->error);
+ printf("\terror_count: %d\n", err->error_count);
+ break;
+
+ case FAN_EVENT_INFO_TYPE_FID:
+ fid = (struct fanotify_event_info_fid *) info;
+
+ printf("\tfsid: %x%x\n",
+ fid->fsid.val[0], fid->fsid.val[1]);
+ print_fh((struct file_handle *) &fid->handle);
+ break;
+
+ default:
+ printf("\tUnknown info type=%d len=%d:\n",
+ info->info_type, info->len);
+ }
+ }
+next_event:
+ printf("---\n\n");
+ }
+}
+
+int main(int argc, char **argv)
+{
+ int fd;
+
+ char buffer[BUFSIZ];
+
+ if (argc < 2) {
+ printf("Missing path argument\n");
+ return 1;
+ }
+
+ fd = fanotify_init(FAN_CLASS_NOTIF|FAN_REPORT_FID, O_RDONLY);
+ if (fd < 0)
+ errx(1, "fanotify_init");
+
+ if (fanotify_mark(fd, FAN_MARK_ADD|FAN_MARK_FILESYSTEM,
+ FAN_FS_ERROR, AT_FDCWD, argv[1])) {
+ errx(1, "fanotify_mark");
+ }
+
+ while (1) {
+ int n = read(fd, buffer, BUFSIZ);
+
+ if (n < 0)
+ errx(1, "read");
+
+ handle_notifications(buffer, n);
+ }
+
+ return 0;
+}
diff --git a/samples/fprobe/Makefile b/samples/fprobe/Makefile
new file mode 100644
index 000000000000..ecccbfa6e99b
--- /dev/null
+++ b/samples/fprobe/Makefile
@@ -0,0 +1,3 @@
+# SPDX-License-Identifier: GPL-2.0-only
+
+obj-$(CONFIG_SAMPLE_FPROBE) += fprobe_example.o
diff --git a/samples/fprobe/fprobe_example.c b/samples/fprobe/fprobe_example.c
new file mode 100644
index 000000000000..e22da8573116
--- /dev/null
+++ b/samples/fprobe/fprobe_example.c
@@ -0,0 +1,148 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Here's a sample kernel module showing the use of fprobe to dump a
+ * stack trace and selected registers when kernel_clone() is called.
+ *
+ * For more information on theory of operation of kprobes, see
+ * Documentation/trace/kprobes.rst
+ *
+ * You will see the trace data in /var/log/messages and on the console
+ * whenever kernel_clone() is invoked to create a new process.
+ */
+
+#define pr_fmt(fmt) "%s: " fmt, __func__
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/fprobe.h>
+#include <linux/sched/debug.h>
+#include <linux/slab.h>
+
+#define BACKTRACE_DEPTH 16
+#define MAX_SYMBOL_LEN 4096
+static struct fprobe sample_probe;
+static unsigned long nhit;
+
+static char symbol[MAX_SYMBOL_LEN] = "kernel_clone";
+module_param_string(symbol, symbol, sizeof(symbol), 0644);
+MODULE_PARM_DESC(symbol, "Probed symbol(s), given by comma separated symbols or a wildcard pattern.");
+
+static char nosymbol[MAX_SYMBOL_LEN] = "";
+module_param_string(nosymbol, nosymbol, sizeof(nosymbol), 0644);
+MODULE_PARM_DESC(nosymbol, "Not-probed symbols, given by a wildcard pattern.");
+
+static bool stackdump = true;
+module_param(stackdump, bool, 0644);
+MODULE_PARM_DESC(stackdump, "Enable stackdump.");
+
+static bool use_trace = false;
+module_param(use_trace, bool, 0644);
+MODULE_PARM_DESC(use_trace, "Use trace_printk instead of printk. This is only for debugging.");
+
+static void show_backtrace(void)
+{
+ unsigned long stacks[BACKTRACE_DEPTH];
+ unsigned int len;
+
+ len = stack_trace_save(stacks, BACKTRACE_DEPTH, 2);
+ stack_trace_print(stacks, len, 24);
+}
+
+static void sample_entry_handler(struct fprobe *fp, unsigned long ip, struct pt_regs *regs)
+{
+ if (use_trace)
+ /*
+ * This is just an example, no kernel code should call
+ * trace_printk() except when actively debugging.
+ */
+ trace_printk("Enter <%pS> ip = 0x%p\n", (void *)ip, (void *)ip);
+ else
+ pr_info("Enter <%pS> ip = 0x%p\n", (void *)ip, (void *)ip);
+ nhit++;
+ if (stackdump)
+ show_backtrace();
+}
+
+static void sample_exit_handler(struct fprobe *fp, unsigned long ip, struct pt_regs *regs)
+{
+ unsigned long rip = instruction_pointer(regs);
+
+ if (use_trace)
+ /*
+ * This is just an example, no kernel code should call
+ * trace_printk() except when actively debugging.
+ */
+ trace_printk("Return from <%pS> ip = 0x%p to rip = 0x%p (%pS)\n",
+ (void *)ip, (void *)ip, (void *)rip, (void *)rip);
+ else
+ pr_info("Return from <%pS> ip = 0x%p to rip = 0x%p (%pS)\n",
+ (void *)ip, (void *)ip, (void *)rip, (void *)rip);
+ nhit++;
+ if (stackdump)
+ show_backtrace();
+}
+
+static int __init fprobe_init(void)
+{
+ char *p, *symbuf = NULL;
+ const char **syms;
+ int ret, count, i;
+
+ sample_probe.entry_handler = sample_entry_handler;
+ sample_probe.exit_handler = sample_exit_handler;
+
+ if (strchr(symbol, '*')) {
+ /* filter based fprobe */
+ ret = register_fprobe(&sample_probe, symbol,
+ nosymbol[0] == '\0' ? NULL : nosymbol);
+ goto out;
+ } else if (!strchr(symbol, ',')) {
+ symbuf = symbol;
+ ret = register_fprobe_syms(&sample_probe, (const char **)&symbuf, 1);
+ goto out;
+ }
+
+ /* Comma separated symbols */
+ symbuf = kstrdup(symbol, GFP_KERNEL);
+ if (!symbuf)
+ return -ENOMEM;
+ p = symbuf;
+ count = 1;
+ while ((p = strchr(++p, ',')) != NULL)
+ count++;
+
+ pr_info("%d symbols found\n", count);
+
+ syms = kcalloc(count, sizeof(char *), GFP_KERNEL);
+ if (!syms) {
+ kfree(symbuf);
+ return -ENOMEM;
+ }
+
+ p = symbuf;
+ for (i = 0; i < count; i++)
+ syms[i] = strsep(&p, ",");
+
+ ret = register_fprobe_syms(&sample_probe, syms, count);
+ kfree(syms);
+ kfree(symbuf);
+out:
+ if (ret < 0)
+ pr_err("register_fprobe failed, returned %d\n", ret);
+ else
+ pr_info("Planted fprobe at %s\n", symbol);
+
+ return ret;
+}
+
+static void __exit fprobe_exit(void)
+{
+ unregister_fprobe(&sample_probe);
+
+ pr_info("fprobe at %s unregistered. %ld times hit, %ld times missed\n",
+ symbol, nhit, sample_probe.nmissed);
+}
+
+module_init(fprobe_init)
+module_exit(fprobe_exit)
+MODULE_LICENSE("GPL");
diff --git a/samples/ftrace/Makefile b/samples/ftrace/Makefile
index 4ce896e10b2e..faf8cdb79c5f 100644
--- a/samples/ftrace/Makefile
+++ b/samples/ftrace/Makefile
@@ -3,6 +3,8 @@
obj-$(CONFIG_SAMPLE_FTRACE_DIRECT) += ftrace-direct.o
obj-$(CONFIG_SAMPLE_FTRACE_DIRECT) += ftrace-direct-too.o
obj-$(CONFIG_SAMPLE_FTRACE_DIRECT) += ftrace-direct-modify.o
+obj-$(CONFIG_SAMPLE_FTRACE_DIRECT_MULTI) += ftrace-direct-multi.o
+obj-$(CONFIG_SAMPLE_FTRACE_DIRECT_MULTI) += ftrace-direct-multi-modify.o
CFLAGS_sample-trace-array.o := -I$(src)
obj-$(CONFIG_SAMPLE_TRACE_ARRAY) += sample-trace-array.o
diff --git a/samples/ftrace/ftrace-direct-modify.c b/samples/ftrace/ftrace-direct-modify.c
index e04229d21475..39146fa83e20 100644
--- a/samples/ftrace/ftrace-direct-modify.c
+++ b/samples/ftrace/ftrace-direct-modify.c
@@ -2,6 +2,10 @@
#include <linux/module.h>
#include <linux/kthread.h>
#include <linux/ftrace.h>
+#include <asm/asm-offsets.h>
+
+extern void my_direct_func1(void);
+extern void my_direct_func2(void);
void my_direct_func1(void)
{
@@ -18,23 +22,77 @@ extern void my_tramp2(void *);
static unsigned long my_ip = (unsigned long)schedule;
+#ifdef CONFIG_X86_64
+
+#include <asm/ibt.h>
+
asm (
" .pushsection .text, \"ax\", @progbits\n"
+" .type my_tramp1, @function\n"
+" .globl my_tramp1\n"
" my_tramp1:"
+ ASM_ENDBR
" pushq %rbp\n"
" movq %rsp, %rbp\n"
" call my_direct_func1\n"
" leave\n"
-" ret\n"
+" .size my_tramp1, .-my_tramp1\n"
+ ASM_RET
+
+" .type my_tramp2, @function\n"
+" .globl my_tramp2\n"
" my_tramp2:"
+ ASM_ENDBR
" pushq %rbp\n"
" movq %rsp, %rbp\n"
" call my_direct_func2\n"
" leave\n"
-" ret\n"
+ ASM_RET
+" .size my_tramp2, .-my_tramp2\n"
" .popsection\n"
);
+#endif /* CONFIG_X86_64 */
+
+#ifdef CONFIG_S390
+
+asm (
+" .pushsection .text, \"ax\", @progbits\n"
+" .type my_tramp1, @function\n"
+" .globl my_tramp1\n"
+" my_tramp1:"
+" lgr %r1,%r15\n"
+" stmg %r0,%r5,"__stringify(__SF_GPRS)"(%r15)\n"
+" stg %r14,"__stringify(__SF_GPRS+8*8)"(%r15)\n"
+" aghi %r15,"__stringify(-STACK_FRAME_OVERHEAD)"\n"
+" stg %r1,"__stringify(__SF_BACKCHAIN)"(%r15)\n"
+" brasl %r14,my_direct_func1\n"
+" aghi %r15,"__stringify(STACK_FRAME_OVERHEAD)"\n"
+" lmg %r0,%r5,"__stringify(__SF_GPRS)"(%r15)\n"
+" lg %r14,"__stringify(__SF_GPRS+8*8)"(%r15)\n"
+" lgr %r1,%r0\n"
+" br %r1\n"
+" .size my_tramp1, .-my_tramp1\n"
+" .type my_tramp2, @function\n"
+" .globl my_tramp2\n"
+" my_tramp2:"
+" lgr %r1,%r15\n"
+" stmg %r0,%r5,"__stringify(__SF_GPRS)"(%r15)\n"
+" stg %r14,"__stringify(__SF_GPRS+8*8)"(%r15)\n"
+" aghi %r15,"__stringify(-STACK_FRAME_OVERHEAD)"\n"
+" stg %r1,"__stringify(__SF_BACKCHAIN)"(%r15)\n"
+" brasl %r14,my_direct_func2\n"
+" aghi %r15,"__stringify(STACK_FRAME_OVERHEAD)"\n"
+" lmg %r0,%r5,"__stringify(__SF_GPRS)"(%r15)\n"
+" lg %r14,"__stringify(__SF_GPRS+8*8)"(%r15)\n"
+" lgr %r1,%r0\n"
+" br %r1\n"
+" .size my_tramp2, .-my_tramp2\n"
+" .popsection\n"
+);
+
+#endif /* CONFIG_S390 */
+
static unsigned long my_tramp = (unsigned long)my_tramp1;
static unsigned long tramps[2] = {
(unsigned long)my_tramp1,
diff --git a/samples/ftrace/ftrace-direct-multi-modify.c b/samples/ftrace/ftrace-direct-multi-modify.c
new file mode 100644
index 000000000000..65aa94d96f4e
--- /dev/null
+++ b/samples/ftrace/ftrace-direct-multi-modify.c
@@ -0,0 +1,159 @@
+// SPDX-License-Identifier: GPL-2.0-only
+#include <linux/module.h>
+#include <linux/kthread.h>
+#include <linux/ftrace.h>
+#include <asm/asm-offsets.h>
+
+extern void my_direct_func1(unsigned long ip);
+extern void my_direct_func2(unsigned long ip);
+
+void my_direct_func1(unsigned long ip)
+{
+ trace_printk("my direct func1 ip %lx\n", ip);
+}
+
+void my_direct_func2(unsigned long ip)
+{
+ trace_printk("my direct func2 ip %lx\n", ip);
+}
+
+extern void my_tramp1(void *);
+extern void my_tramp2(void *);
+
+#ifdef CONFIG_X86_64
+
+#include <asm/ibt.h>
+
+asm (
+" .pushsection .text, \"ax\", @progbits\n"
+" .type my_tramp1, @function\n"
+" .globl my_tramp1\n"
+" my_tramp1:"
+ ASM_ENDBR
+" pushq %rbp\n"
+" movq %rsp, %rbp\n"
+" pushq %rdi\n"
+" movq 8(%rbp), %rdi\n"
+" call my_direct_func1\n"
+" popq %rdi\n"
+" leave\n"
+ ASM_RET
+" .size my_tramp1, .-my_tramp1\n"
+
+" .type my_tramp2, @function\n"
+" .globl my_tramp2\n"
+" my_tramp2:"
+ ASM_ENDBR
+" pushq %rbp\n"
+" movq %rsp, %rbp\n"
+" pushq %rdi\n"
+" movq 8(%rbp), %rdi\n"
+" call my_direct_func2\n"
+" popq %rdi\n"
+" leave\n"
+ ASM_RET
+" .size my_tramp2, .-my_tramp2\n"
+" .popsection\n"
+);
+
+#endif /* CONFIG_X86_64 */
+
+#ifdef CONFIG_S390
+
+asm (
+" .pushsection .text, \"ax\", @progbits\n"
+" .type my_tramp1, @function\n"
+" .globl my_tramp1\n"
+" my_tramp1:"
+" lgr %r1,%r15\n"
+" stmg %r0,%r5,"__stringify(__SF_GPRS)"(%r15)\n"
+" stg %r14,"__stringify(__SF_GPRS+8*8)"(%r15)\n"
+" aghi %r15,"__stringify(-STACK_FRAME_OVERHEAD)"\n"
+" stg %r1,"__stringify(__SF_BACKCHAIN)"(%r15)\n"
+" lgr %r2,%r0\n"
+" brasl %r14,my_direct_func1\n"
+" aghi %r15,"__stringify(STACK_FRAME_OVERHEAD)"\n"
+" lmg %r0,%r5,"__stringify(__SF_GPRS)"(%r15)\n"
+" lg %r14,"__stringify(__SF_GPRS+8*8)"(%r15)\n"
+" lgr %r1,%r0\n"
+" br %r1\n"
+" .size my_tramp1, .-my_tramp1\n"
+"\n"
+" .type my_tramp2, @function\n"
+" .globl my_tramp2\n"
+" my_tramp2:"
+" lgr %r1,%r15\n"
+" stmg %r0,%r5,"__stringify(__SF_GPRS)"(%r15)\n"
+" stg %r14,"__stringify(__SF_GPRS+8*8)"(%r15)\n"
+" aghi %r15,"__stringify(-STACK_FRAME_OVERHEAD)"\n"
+" stg %r1,"__stringify(__SF_BACKCHAIN)"(%r15)\n"
+" lgr %r2,%r0\n"
+" brasl %r14,my_direct_func2\n"
+" aghi %r15,"__stringify(STACK_FRAME_OVERHEAD)"\n"
+" lmg %r0,%r5,"__stringify(__SF_GPRS)"(%r15)\n"
+" lg %r14,"__stringify(__SF_GPRS+8*8)"(%r15)\n"
+" lgr %r1,%r0\n"
+" br %r1\n"
+" .size my_tramp2, .-my_tramp2\n"
+" .popsection\n"
+);
+
+#endif /* CONFIG_S390 */
+
+static unsigned long my_tramp = (unsigned long)my_tramp1;
+static unsigned long tramps[2] = {
+ (unsigned long)my_tramp1,
+ (unsigned long)my_tramp2,
+};
+
+static struct ftrace_ops direct;
+
+static int simple_thread(void *arg)
+{
+ static int t;
+ int ret = 0;
+
+ while (!kthread_should_stop()) {
+ set_current_state(TASK_INTERRUPTIBLE);
+ schedule_timeout(2 * HZ);
+
+ if (ret)
+ continue;
+ t ^= 1;
+ ret = modify_ftrace_direct_multi(&direct, tramps[t]);
+ if (!ret)
+ my_tramp = tramps[t];
+ WARN_ON_ONCE(ret);
+ }
+
+ return 0;
+}
+
+static struct task_struct *simple_tsk;
+
+static int __init ftrace_direct_multi_init(void)
+{
+ int ret;
+
+ ftrace_set_filter_ip(&direct, (unsigned long) wake_up_process, 0, 0);
+ ftrace_set_filter_ip(&direct, (unsigned long) schedule, 0, 0);
+
+ ret = register_ftrace_direct_multi(&direct, my_tramp);
+
+ if (!ret)
+ simple_tsk = kthread_run(simple_thread, NULL, "event-sample-fn");
+ return ret;
+}
+
+static void __exit ftrace_direct_multi_exit(void)
+{
+ kthread_stop(simple_tsk);
+ unregister_ftrace_direct_multi(&direct, my_tramp);
+}
+
+module_init(ftrace_direct_multi_init);
+module_exit(ftrace_direct_multi_exit);
+
+MODULE_AUTHOR("Jiri Olsa");
+MODULE_DESCRIPTION("Example use case of using modify_ftrace_direct_multi()");
+MODULE_LICENSE("GPL");
diff --git a/samples/ftrace/ftrace-direct-multi.c b/samples/ftrace/ftrace-direct-multi.c
new file mode 100644
index 000000000000..41ded7c615c7
--- /dev/null
+++ b/samples/ftrace/ftrace-direct-multi.c
@@ -0,0 +1,87 @@
+// SPDX-License-Identifier: GPL-2.0-only
+#include <linux/module.h>
+
+#include <linux/mm.h> /* for handle_mm_fault() */
+#include <linux/ftrace.h>
+#include <linux/sched/stat.h>
+#include <asm/asm-offsets.h>
+
+extern void my_direct_func(unsigned long ip);
+
+void my_direct_func(unsigned long ip)
+{
+ trace_printk("ip %lx\n", ip);
+}
+
+extern void my_tramp(void *);
+
+#ifdef CONFIG_X86_64
+
+#include <asm/ibt.h>
+
+asm (
+" .pushsection .text, \"ax\", @progbits\n"
+" .type my_tramp, @function\n"
+" .globl my_tramp\n"
+" my_tramp:"
+ ASM_ENDBR
+" pushq %rbp\n"
+" movq %rsp, %rbp\n"
+" pushq %rdi\n"
+" movq 8(%rbp), %rdi\n"
+" call my_direct_func\n"
+" popq %rdi\n"
+" leave\n"
+ ASM_RET
+" .size my_tramp, .-my_tramp\n"
+" .popsection\n"
+);
+
+#endif /* CONFIG_X86_64 */
+
+#ifdef CONFIG_S390
+
+asm (
+" .pushsection .text, \"ax\", @progbits\n"
+" .type my_tramp, @function\n"
+" .globl my_tramp\n"
+" my_tramp:"
+" lgr %r1,%r15\n"
+" stmg %r0,%r5,"__stringify(__SF_GPRS)"(%r15)\n"
+" stg %r14,"__stringify(__SF_GPRS+8*8)"(%r15)\n"
+" aghi %r15,"__stringify(-STACK_FRAME_OVERHEAD)"\n"
+" stg %r1,"__stringify(__SF_BACKCHAIN)"(%r15)\n"
+" lgr %r2,%r0\n"
+" brasl %r14,my_direct_func\n"
+" aghi %r15,"__stringify(STACK_FRAME_OVERHEAD)"\n"
+" lmg %r0,%r5,"__stringify(__SF_GPRS)"(%r15)\n"
+" lg %r14,"__stringify(__SF_GPRS+8*8)"(%r15)\n"
+" lgr %r1,%r0\n"
+" br %r1\n"
+" .size my_tramp, .-my_tramp\n"
+" .popsection\n"
+);
+
+#endif /* CONFIG_S390 */
+
+static struct ftrace_ops direct;
+
+static int __init ftrace_direct_multi_init(void)
+{
+ ftrace_set_filter_ip(&direct, (unsigned long) wake_up_process, 0, 0);
+ ftrace_set_filter_ip(&direct, (unsigned long) schedule, 0, 0);
+
+ return register_ftrace_direct_multi(&direct, (unsigned long) my_tramp);
+}
+
+static void __exit ftrace_direct_multi_exit(void)
+{
+ unregister_ftrace_direct_multi(&direct, (unsigned long) my_tramp);
+}
+
+module_init(ftrace_direct_multi_init);
+module_exit(ftrace_direct_multi_exit);
+
+MODULE_AUTHOR("Jiri Olsa");
+MODULE_DESCRIPTION("Example use case of using register_ftrace_direct_multi()");
+MODULE_LICENSE("GPL");
diff --git a/samples/ftrace/ftrace-direct-too.c b/samples/ftrace/ftrace-direct-too.c
index 27efa5f6ff52..6690468c5cc2 100644
--- a/samples/ftrace/ftrace-direct-too.c
+++ b/samples/ftrace/ftrace-direct-too.c
@@ -3,6 +3,10 @@
#include <linux/mm.h> /* for handle_mm_fault() */
#include <linux/ftrace.h>
+#include <asm/asm-offsets.h>
+
+extern void my_direct_func(struct vm_area_struct *vma,
+ unsigned long address, unsigned int flags);
void my_direct_func(struct vm_area_struct *vma,
unsigned long address, unsigned int flags)
@@ -13,9 +17,16 @@ void my_direct_func(struct vm_area_struct *vma,
extern void my_tramp(void *);
+#ifdef CONFIG_X86_64
+
+#include <asm/ibt.h>
+
asm (
" .pushsection .text, \"ax\", @progbits\n"
+" .type my_tramp, @function\n"
+" .globl my_tramp\n"
" my_tramp:"
+ ASM_ENDBR
" pushq %rbp\n"
" movq %rsp, %rbp\n"
" pushq %rdi\n"
@@ -26,10 +37,36 @@ asm (
" popq %rsi\n"
" popq %rdi\n"
" leave\n"
-" ret\n"
+ ASM_RET
+" .size my_tramp, .-my_tramp\n"
+" .popsection\n"
+);
+
+#endif /* CONFIG_X86_64 */
+
+#ifdef CONFIG_S390
+
+asm (
+" .pushsection .text, \"ax\", @progbits\n"
+" .type my_tramp, @function\n"
+" .globl my_tramp\n"
+" my_tramp:"
+" lgr %r1,%r15\n"
+" stmg %r0,%r5,"__stringify(__SF_GPRS)"(%r15)\n"
+" stg %r14,"__stringify(__SF_GPRS+8*8)"(%r15)\n"
+" aghi %r15,"__stringify(-STACK_FRAME_OVERHEAD)"\n"
+" stg %r1,"__stringify(__SF_BACKCHAIN)"(%r15)\n"
+" brasl %r14,my_direct_func\n"
+" aghi %r15,"__stringify(STACK_FRAME_OVERHEAD)"\n"
+" lmg %r0,%r5,"__stringify(__SF_GPRS)"(%r15)\n"
+" lg %r14,"__stringify(__SF_GPRS+8*8)"(%r15)\n"
+" lgr %r1,%r0\n"
+" br %r1\n"
+" .size my_tramp, .-my_tramp\n"
" .popsection\n"
);
+#endif /* CONFIG_S390 */
static int __init ftrace_direct_init(void)
{
diff --git a/samples/ftrace/ftrace-direct.c b/samples/ftrace/ftrace-direct.c
index a2e3063bd306..e8f1e440b9b8 100644
--- a/samples/ftrace/ftrace-direct.c
+++ b/samples/ftrace/ftrace-direct.c
@@ -3,6 +3,9 @@
#include <linux/sched.h> /* for wake_up_process() */
#include <linux/ftrace.h>
+#include <asm/asm-offsets.h>
+
+extern void my_direct_func(struct task_struct *p);
void my_direct_func(struct task_struct *p)
{
@@ -11,19 +14,52 @@ void my_direct_func(struct task_struct *p)
extern void my_tramp(void *);
+#ifdef CONFIG_X86_64
+
+#include <asm/ibt.h>
+
asm (
" .pushsection .text, \"ax\", @progbits\n"
+" .type my_tramp, @function\n"
+" .globl my_tramp\n"
" my_tramp:"
+ ASM_ENDBR
" pushq %rbp\n"
" movq %rsp, %rbp\n"
" pushq %rdi\n"
" call my_direct_func\n"
" popq %rdi\n"
" leave\n"
-" ret\n"
+ ASM_RET
+" .size my_tramp, .-my_tramp\n"
+" .popsection\n"
+);
+
+#endif /* CONFIG_X86_64 */
+
+#ifdef CONFIG_S390
+
+asm (
+" .pushsection .text, \"ax\", @progbits\n"
+" .type my_tramp, @function\n"
+" .globl my_tramp\n"
+" my_tramp:"
+" lgr %r1,%r15\n"
+" stmg %r0,%r5,"__stringify(__SF_GPRS)"(%r15)\n"
+" stg %r14,"__stringify(__SF_GPRS+8*8)"(%r15)\n"
+" aghi %r15,"__stringify(-STACK_FRAME_OVERHEAD)"\n"
+" stg %r1,"__stringify(__SF_BACKCHAIN)"(%r15)\n"
+" brasl %r14,my_direct_func\n"
+" aghi %r15,"__stringify(STACK_FRAME_OVERHEAD)"\n"
+" lmg %r0,%r5,"__stringify(__SF_GPRS)"(%r15)\n"
+" lg %r14,"__stringify(__SF_GPRS+8*8)"(%r15)\n"
+" lgr %r1,%r0\n"
+" br %r1\n"
+" .size my_tramp, .-my_tramp\n"
" .popsection\n"
);
+#endif /* CONFIG_S390 */
static int __init ftrace_direct_init(void)
{
diff --git a/samples/ftrace/sample-trace-array.c b/samples/ftrace/sample-trace-array.c
index d523450d73eb..6aba02a31c96 100644
--- a/samples/ftrace/sample-trace-array.c
+++ b/samples/ftrace/sample-trace-array.c
@@ -6,6 +6,7 @@
#include <linux/timer.h>
#include <linux/err.h>
#include <linux/jiffies.h>
+#include <linux/workqueue.h>
/*
* Any file that uses trace points, must include the header.
@@ -20,6 +21,16 @@ struct trace_array *tr;
static void mytimer_handler(struct timer_list *unused);
static struct task_struct *simple_tsk;
+static void trace_work_fn(struct work_struct *work)
+{
+ /*
+ * Disable tracing for event "sample_event".
+ */
+ trace_array_set_clr_event(tr, "sample-subsystem", "sample_event",
+ false);
+}
+static DECLARE_WORK(trace_work, trace_work_fn);
+
/*
* mytimer: Timer setup to disable tracing for event "sample_event". This
* timer is only for the purposes of the sample module to demonstrate access of
@@ -29,11 +40,7 @@ static DEFINE_TIMER(mytimer, mytimer_handler);
static void mytimer_handler(struct timer_list *unused)
{
- /*
- * Disable tracing for event "sample_event".
- */
- trace_array_set_clr_event(tr, "sample-subsystem", "sample_event",
- false);
+ schedule_work(&trace_work);
}
static void simple_thread_func(int count)
@@ -76,6 +83,7 @@ static int simple_thread(void *arg)
simple_thread_func(count++);
del_timer(&mytimer);
+ cancel_work_sync(&trace_work);
/*
* trace_array_put() decrements the reference counter associated with
@@ -107,8 +115,12 @@ static int __init sample_trace_array_init(void)
trace_printk_init_buffers();
simple_tsk = kthread_run(simple_thread, NULL, "sample-instance");
- if (IS_ERR(simple_tsk))
+ if (IS_ERR(simple_tsk)) {
+ trace_array_put(tr);
+ trace_array_destroy(tr);
return -1;
+ }
+
return 0;
}
diff --git a/samples/hidraw/.gitignore b/samples/hidraw/.gitignore
index 05e51a685242..5233ab63262e 100644
--- a/samples/hidraw/.gitignore
+++ b/samples/hidraw/.gitignore
@@ -1 +1,2 @@
-hid-example
+# SPDX-License-Identifier: GPL-2.0-only
+/hid-example
diff --git a/samples/hidraw/Makefile b/samples/hidraw/Makefile
index 8bd25f77671f..594d989e5486 100644
--- a/samples/hidraw/Makefile
+++ b/samples/hidraw/Makefile
@@ -1,8 +1,4 @@
# SPDX-License-Identifier: GPL-2.0
-# List of programs to build
-hostprogs := hid-example
-always-y := $(hostprogs)
+userprogs-always-y += hid-example
-HOSTCFLAGS_hid-example.o += -I$(objtree)/usr/include
-
-all: hid-example
+userccflags += -I usr/include
diff --git a/samples/hidraw/hid-example.c b/samples/hidraw/hid-example.c
index 37a0ffcb4d63..0f73ace3c6c3 100644
--- a/samples/hidraw/hid-example.c
+++ b/samples/hidraw/hid-example.c
@@ -128,7 +128,7 @@ int main(int argc, char **argv)
perror("HIDIOCGFEATURE");
} else {
printf("ioctl HIDIOCGFEATURE returned: %d\n", res);
- printf("Report data (not containing the report number):\n\t");
+ printf("Report data:\n\t");
for (i = 0; i < res; i++)
printf("%hhx ", buf[i]);
puts("\n");
diff --git a/samples/hw_breakpoint/data_breakpoint.c b/samples/hw_breakpoint/data_breakpoint.c
index c58504774118..418c46fe5ffc 100644
--- a/samples/hw_breakpoint/data_breakpoint.c
+++ b/samples/hw_breakpoint/data_breakpoint.c
@@ -23,7 +23,7 @@
struct perf_event * __percpu *sample_hbp;
-static char ksym_name[KSYM_NAME_LEN] = "pid_max";
+static char ksym_name[KSYM_NAME_LEN] = "jiffies";
module_param_string(ksym, ksym_name, KSYM_NAME_LEN, S_IRUGO);
MODULE_PARM_DESC(ksym, "Kernel symbol to monitor; this module will report any"
" write operations on the kernel symbol");
@@ -41,11 +41,15 @@ static int __init hw_break_module_init(void)
{
int ret;
struct perf_event_attr attr;
+ void *addr = __symbol_get(ksym_name);
+
+ if (!addr)
+ return -ENXIO;
hw_breakpoint_init(&attr);
- attr.bp_addr = kallsyms_lookup_name(ksym_name);
+ attr.bp_addr = (unsigned long)addr;
attr.bp_len = HW_BREAKPOINT_LEN_4;
- attr.bp_type = HW_BREAKPOINT_W | HW_BREAKPOINT_R;
+ attr.bp_type = HW_BREAKPOINT_W;
sample_hbp = register_wide_hw_breakpoint(&attr, sample_hbp_handler, NULL);
if (IS_ERR((void __force *)sample_hbp)) {
@@ -66,6 +70,7 @@ fail:
static void __exit hw_break_module_exit(void)
{
unregister_wide_hw_breakpoint(sample_hbp);
+ symbol_put(ksym_name);
printk(KERN_INFO "HW Breakpoint for %s write uninstalled\n", ksym_name);
}
diff --git a/samples/kdb/kdb_hello.c b/samples/kdb/kdb_hello.c
index c1c2fa0f62c2..82736e5a5e32 100644
--- a/samples/kdb/kdb_hello.c
+++ b/samples/kdb/kdb_hello.c
@@ -28,28 +28,26 @@ static int kdb_hello_cmd(int argc, const char **argv)
return 0;
}
+static kdbtab_t hello_cmd = {
+ .name = "hello",
+ .func = kdb_hello_cmd,
+ .usage = "[string]",
+ .help = "Say Hello World or Hello [string]",
+};
static int __init kdb_hello_cmd_init(void)
{
/*
* Registration of a dynamically added kdb command is done with
- * kdb_register() with the arguments being:
- * 1: The name of the shell command
- * 2: The function that processes the command
- * 3: Description of the usage of any arguments
- * 4: Descriptive text when you run help
- * 5: Number of characters to complete the command
- * 0 == type the whole command
- * 1 == match both "g" and "go" for example
+ * kdb_register().
*/
- kdb_register("hello", kdb_hello_cmd, "[string]",
- "Say Hello World or Hello [string]", 0);
+ kdb_register(&hello_cmd);
return 0;
}
static void __exit kdb_hello_cmd_exit(void)
{
- kdb_unregister("hello");
+ kdb_unregister(&hello_cmd);
}
module_init(kdb_hello_cmd_init);
diff --git a/samples/kfifo/bytestream-example.c b/samples/kfifo/bytestream-example.c
index c406f03ee551..642d0748c169 100644
--- a/samples/kfifo/bytestream-example.c
+++ b/samples/kfifo/bytestream-example.c
@@ -22,10 +22,10 @@
#define PROC_FIFO "bytestream-fifo"
/* lock for procfs read access */
-static DEFINE_MUTEX(read_lock);
+static DEFINE_MUTEX(read_access);
/* lock for procfs write access */
-static DEFINE_MUTEX(write_lock);
+static DEFINE_MUTEX(write_access);
/*
* define DYNAMIC in this example for a dynamically allocated fifo.
@@ -116,14 +116,16 @@ static ssize_t fifo_write(struct file *file, const char __user *buf,
int ret;
unsigned int copied;
- if (mutex_lock_interruptible(&write_lock))
+ if (mutex_lock_interruptible(&write_access))
return -ERESTARTSYS;
ret = kfifo_from_user(&test, buf, count, &copied);
- mutex_unlock(&write_lock);
+ mutex_unlock(&write_access);
+ if (ret)
+ return ret;
- return ret ? ret : copied;
+ return copied;
}
static ssize_t fifo_read(struct file *file, char __user *buf,
@@ -132,14 +134,16 @@ static ssize_t fifo_read(struct file *file, char __user *buf,
int ret;
unsigned int copied;
- if (mutex_lock_interruptible(&read_lock))
+ if (mutex_lock_interruptible(&read_access))
return -ERESTARTSYS;
ret = kfifo_to_user(&test, buf, count, &copied);
- mutex_unlock(&read_lock);
+ mutex_unlock(&read_access);
+ if (ret)
+ return ret;
- return ret ? ret : copied;
+ return copied;
}
static const struct proc_ops fifo_proc_ops = {
diff --git a/samples/kfifo/inttype-example.c b/samples/kfifo/inttype-example.c
index 78977fc4a23f..c61482ba94f4 100644
--- a/samples/kfifo/inttype-example.c
+++ b/samples/kfifo/inttype-example.c
@@ -22,10 +22,10 @@
#define PROC_FIFO "int-fifo"
/* lock for procfs read access */
-static DEFINE_MUTEX(read_lock);
+static DEFINE_MUTEX(read_access);
/* lock for procfs write access */
-static DEFINE_MUTEX(write_lock);
+static DEFINE_MUTEX(write_access);
/*
* define DYNAMIC in this example for a dynamically allocated fifo.
@@ -109,14 +109,16 @@ static ssize_t fifo_write(struct file *file, const char __user *buf,
int ret;
unsigned int copied;
- if (mutex_lock_interruptible(&write_lock))
+ if (mutex_lock_interruptible(&write_access))
return -ERESTARTSYS;
ret = kfifo_from_user(&test, buf, count, &copied);
- mutex_unlock(&write_lock);
+ mutex_unlock(&write_access);
+ if (ret)
+ return ret;
- return ret ? ret : copied;
+ return copied;
}
static ssize_t fifo_read(struct file *file, char __user *buf,
@@ -125,14 +127,16 @@ static ssize_t fifo_read(struct file *file, char __user *buf,
int ret;
unsigned int copied;
- if (mutex_lock_interruptible(&read_lock))
+ if (mutex_lock_interruptible(&read_access))
return -ERESTARTSYS;
ret = kfifo_to_user(&test, buf, count, &copied);
- mutex_unlock(&read_lock);
+ mutex_unlock(&read_access);
+ if (ret)
+ return ret;
- return ret ? ret : copied;
+ return copied;
}
static const struct proc_ops fifo_proc_ops = {
diff --git a/samples/kfifo/record-example.c b/samples/kfifo/record-example.c
index c507998a2617..e4087b2d3fc4 100644
--- a/samples/kfifo/record-example.c
+++ b/samples/kfifo/record-example.c
@@ -22,10 +22,10 @@
#define PROC_FIFO "record-fifo"
/* lock for procfs read access */
-static DEFINE_MUTEX(read_lock);
+static DEFINE_MUTEX(read_access);
/* lock for procfs write access */
-static DEFINE_MUTEX(write_lock);
+static DEFINE_MUTEX(write_access);
/*
* define DYNAMIC in this example for a dynamically allocated fifo.
@@ -123,14 +123,16 @@ static ssize_t fifo_write(struct file *file, const char __user *buf,
int ret;
unsigned int copied;
- if (mutex_lock_interruptible(&write_lock))
+ if (mutex_lock_interruptible(&write_access))
return -ERESTARTSYS;
ret = kfifo_from_user(&test, buf, count, &copied);
- mutex_unlock(&write_lock);
+ mutex_unlock(&write_access);
+ if (ret)
+ return ret;
- return ret ? ret : copied;
+ return copied;
}
static ssize_t fifo_read(struct file *file, char __user *buf,
@@ -139,14 +141,16 @@ static ssize_t fifo_read(struct file *file, char __user *buf,
int ret;
unsigned int copied;
- if (mutex_lock_interruptible(&read_lock))
+ if (mutex_lock_interruptible(&read_access))
return -ERESTARTSYS;
ret = kfifo_to_user(&test, buf, count, &copied);
- mutex_unlock(&read_lock);
+ mutex_unlock(&read_access);
+ if (ret)
+ return ret;
- return ret ? ret : copied;
+ return copied;
}
static const struct proc_ops fifo_proc_ops = {
diff --git a/samples/kmemleak/Makefile b/samples/kmemleak/Makefile
new file mode 100644
index 000000000000..16b6132c540c
--- /dev/null
+++ b/samples/kmemleak/Makefile
@@ -0,0 +1,3 @@
+# SPDX-License-Identifier: GPL-2.0-only
+
+obj-$(CONFIG_DEBUG_KMEMLEAK_TEST) += kmemleak-test.o
diff --git a/samples/kmemleak/kmemleak-test.c b/samples/kmemleak/kmemleak-test.c
new file mode 100644
index 000000000000..7b476eb8285f
--- /dev/null
+++ b/samples/kmemleak/kmemleak-test.c
@@ -0,0 +1,99 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * samples/kmemleak/kmemleak-test.c
+ *
+ * Copyright (C) 2008 ARM Limited
+ * Written by Catalin Marinas <catalin.marinas@arm.com>
+ */
+
+#define pr_fmt(fmt) "kmemleak: " fmt
+
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/slab.h>
+#include <linux/vmalloc.h>
+#include <linux/list.h>
+#include <linux/percpu.h>
+#include <linux/fdtable.h>
+
+#include <linux/kmemleak.h>
+
+struct test_node {
+ long header[25];
+ struct list_head list;
+ long footer[25];
+};
+
+static LIST_HEAD(test_list);
+static DEFINE_PER_CPU(void *, kmemleak_test_pointer);
+
+/*
+ * Some very simple testing. This function needs to be extended for
+ * proper testing.
+ */
+static int __init kmemleak_test_init(void)
+{
+ struct test_node *elem;
+ int i;
+
+ pr_info("Kmemleak testing\n");
+
+ /* make some orphan objects */
+ pr_info("kmalloc(32) = %p\n", kmalloc(32, GFP_KERNEL));
+ pr_info("kmalloc(32) = %p\n", kmalloc(32, GFP_KERNEL));
+ pr_info("kmalloc(1024) = %p\n", kmalloc(1024, GFP_KERNEL));
+ pr_info("kmalloc(1024) = %p\n", kmalloc(1024, GFP_KERNEL));
+ pr_info("kmalloc(2048) = %p\n", kmalloc(2048, GFP_KERNEL));
+ pr_info("kmalloc(2048) = %p\n", kmalloc(2048, GFP_KERNEL));
+ pr_info("kmalloc(4096) = %p\n", kmalloc(4096, GFP_KERNEL));
+ pr_info("kmalloc(4096) = %p\n", kmalloc(4096, GFP_KERNEL));
+#ifndef CONFIG_MODULES
+ pr_info("kmem_cache_alloc(files_cachep) = %p\n",
+ kmem_cache_alloc(files_cachep, GFP_KERNEL));
+ pr_info("kmem_cache_alloc(files_cachep) = %p\n",
+ kmem_cache_alloc(files_cachep, GFP_KERNEL));
+#endif
+ pr_info("vmalloc(64) = %p\n", vmalloc(64));
+ pr_info("vmalloc(64) = %p\n", vmalloc(64));
+ pr_info("vmalloc(64) = %p\n", vmalloc(64));
+ pr_info("vmalloc(64) = %p\n", vmalloc(64));
+ pr_info("vmalloc(64) = %p\n", vmalloc(64));
+
+ /*
+ * Add elements to a list. They should only appear as orphan
+ * after the module is removed.
+ */
+ for (i = 0; i < 10; i++) {
+ elem = kzalloc(sizeof(*elem), GFP_KERNEL);
+ pr_info("kzalloc(sizeof(*elem)) = %p\n", elem);
+ if (!elem)
+ return -ENOMEM;
+ INIT_LIST_HEAD(&elem->list);
+ list_add_tail(&elem->list, &test_list);
+ }
+
+ for_each_possible_cpu(i) {
+ per_cpu(kmemleak_test_pointer, i) = kmalloc(129, GFP_KERNEL);
+ pr_info("kmalloc(129) = %p\n",
+ per_cpu(kmemleak_test_pointer, i));
+ }
+
+ return 0;
+}
+module_init(kmemleak_test_init);
+
+static void __exit kmemleak_test_exit(void)
+{
+ struct test_node *elem, *tmp;
+
+ /*
+ * Remove the list elements without actually freeing the
+ * memory.
+ */
+ list_for_each_entry_safe(elem, tmp, &test_list, list)
+ list_del(&elem->list);
+}
+module_exit(kmemleak_test_exit);
+
+MODULE_LICENSE("GPL");
diff --git a/samples/kobject/kobject-example.c b/samples/kobject/kobject-example.c
index 9e383fdbaa00..96678ed73216 100644
--- a/samples/kobject/kobject-example.c
+++ b/samples/kobject/kobject-example.c
@@ -28,7 +28,7 @@ static int bar;
static ssize_t foo_show(struct kobject *kobj, struct kobj_attribute *attr,
char *buf)
{
- return sprintf(buf, "%d\n", foo);
+ return sysfs_emit(buf, "%d\n", foo);
}
static ssize_t foo_store(struct kobject *kobj, struct kobj_attribute *attr,
@@ -60,7 +60,7 @@ static ssize_t b_show(struct kobject *kobj, struct kobj_attribute *attr,
var = baz;
else
var = bar;
- return sprintf(buf, "%d\n", var);
+ return sysfs_emit(buf, "%d\n", var);
}
static ssize_t b_store(struct kobject *kobj, struct kobj_attribute *attr,
diff --git a/samples/kobject/kset-example.c b/samples/kobject/kset-example.c
index c8010f126808..52f1acabd479 100644
--- a/samples/kobject/kset-example.c
+++ b/samples/kobject/kset-example.c
@@ -112,7 +112,7 @@ static void foo_release(struct kobject *kobj)
static ssize_t foo_show(struct foo_obj *foo_obj, struct foo_attribute *attr,
char *buf)
{
- return sprintf(buf, "%d\n", foo_obj->foo);
+ return sysfs_emit(buf, "%d\n", foo_obj->foo);
}
static ssize_t foo_store(struct foo_obj *foo_obj, struct foo_attribute *attr,
@@ -144,7 +144,7 @@ static ssize_t b_show(struct foo_obj *foo_obj, struct foo_attribute *attr,
var = foo_obj->baz;
else
var = foo_obj->bar;
- return sprintf(buf, "%d\n", var);
+ return sysfs_emit(buf, "%d\n", var);
}
static ssize_t b_store(struct foo_obj *foo_obj, struct foo_attribute *attr,
diff --git a/samples/kprobes/kprobe_example.c b/samples/kprobes/kprobe_example.c
index d693c23a85e8..fd346f58ddba 100644
--- a/samples/kprobes/kprobe_example.c
+++ b/samples/kprobes/kprobe_example.c
@@ -1,23 +1,23 @@
// SPDX-License-Identifier: GPL-2.0-only
/*
- * NOTE: This example is works on x86 and powerpc.
* Here's a sample kernel module showing the use of kprobes to dump a
- * stack trace and selected registers when _do_fork() is called.
+ * stack trace and selected registers when kernel_clone() is called.
*
* For more information on theory of operation of kprobes, see
- * Documentation/kprobes.txt
+ * Documentation/trace/kprobes.rst
*
* You will see the trace data in /var/log/messages and on the console
- * whenever _do_fork() is invoked to create a new process.
+ * whenever kernel_clone() is invoked to create a new process.
*/
+#define pr_fmt(fmt) "%s: " fmt, __func__
+
#include <linux/kernel.h>
#include <linux/module.h>
#include <linux/kprobes.h>
-#define MAX_SYMBOL_LEN 64
-static char symbol[MAX_SYMBOL_LEN] = "_do_fork";
-module_param_string(symbol, symbol, sizeof(symbol), 0644);
+static char symbol[KSYM_NAME_LEN] = "kernel_clone";
+module_param_string(symbol, symbol, KSYM_NAME_LEN, 0644);
/* For each probe you need to allocate a kprobe structure */
static struct kprobe kp = {
@@ -25,27 +25,34 @@ static struct kprobe kp = {
};
/* kprobe pre_handler: called just before the probed instruction is executed */
-static int handler_pre(struct kprobe *p, struct pt_regs *regs)
+static int __kprobes handler_pre(struct kprobe *p, struct pt_regs *regs)
{
#ifdef CONFIG_X86
- pr_info("<%s> pre_handler: p->addr = 0x%p, ip = %lx, flags = 0x%lx\n",
+ pr_info("<%s> p->addr = 0x%p, ip = %lx, flags = 0x%lx\n",
p->symbol_name, p->addr, regs->ip, regs->flags);
#endif
#ifdef CONFIG_PPC
- pr_info("<%s> pre_handler: p->addr = 0x%p, nip = 0x%lx, msr = 0x%lx\n",
+ pr_info("<%s> p->addr = 0x%p, nip = 0x%lx, msr = 0x%lx\n",
p->symbol_name, p->addr, regs->nip, regs->msr);
#endif
#ifdef CONFIG_MIPS
- pr_info("<%s> pre_handler: p->addr = 0x%p, epc = 0x%lx, status = 0x%lx\n",
+ pr_info("<%s> p->addr = 0x%p, epc = 0x%lx, status = 0x%lx\n",
p->symbol_name, p->addr, regs->cp0_epc, regs->cp0_status);
#endif
#ifdef CONFIG_ARM64
- pr_info("<%s> pre_handler: p->addr = 0x%p, pc = 0x%lx,"
- " pstate = 0x%lx\n",
+ pr_info("<%s> p->addr = 0x%p, pc = 0x%lx, pstate = 0x%lx\n",
p->symbol_name, p->addr, (long)regs->pc, (long)regs->pstate);
#endif
+#ifdef CONFIG_ARM
+ pr_info("<%s> p->addr = 0x%p, pc = 0x%lx, cpsr = 0x%lx\n",
+ p->symbol_name, p->addr, (long)regs->ARM_pc, (long)regs->ARM_cpsr);
+#endif
+#ifdef CONFIG_RISCV
+ pr_info("<%s> p->addr = 0x%p, pc = 0x%lx, status = 0x%lx\n",
+ p->symbol_name, p->addr, regs->epc, regs->status);
+#endif
#ifdef CONFIG_S390
- pr_info("<%s> pre_handler: p->addr, 0x%p, ip = 0x%lx, flags = 0x%lx\n",
+ pr_info("<%s> p->addr, 0x%p, ip = 0x%lx, flags = 0x%lx\n",
p->symbol_name, p->addr, regs->psw.addr, regs->flags);
#endif
@@ -54,49 +61,44 @@ static int handler_pre(struct kprobe *p, struct pt_regs *regs)
}
/* kprobe post_handler: called after the probed instruction is executed */
-static void handler_post(struct kprobe *p, struct pt_regs *regs,
+static void __kprobes handler_post(struct kprobe *p, struct pt_regs *regs,
unsigned long flags)
{
#ifdef CONFIG_X86
- pr_info("<%s> post_handler: p->addr = 0x%p, flags = 0x%lx\n",
+ pr_info("<%s> p->addr = 0x%p, flags = 0x%lx\n",
p->symbol_name, p->addr, regs->flags);
#endif
#ifdef CONFIG_PPC
- pr_info("<%s> post_handler: p->addr = 0x%p, msr = 0x%lx\n",
+ pr_info("<%s> p->addr = 0x%p, msr = 0x%lx\n",
p->symbol_name, p->addr, regs->msr);
#endif
#ifdef CONFIG_MIPS
- pr_info("<%s> post_handler: p->addr = 0x%p, status = 0x%lx\n",
+ pr_info("<%s> p->addr = 0x%p, status = 0x%lx\n",
p->symbol_name, p->addr, regs->cp0_status);
#endif
#ifdef CONFIG_ARM64
- pr_info("<%s> post_handler: p->addr = 0x%p, pstate = 0x%lx\n",
+ pr_info("<%s> p->addr = 0x%p, pstate = 0x%lx\n",
p->symbol_name, p->addr, (long)regs->pstate);
#endif
+#ifdef CONFIG_ARM
+ pr_info("<%s> p->addr = 0x%p, cpsr = 0x%lx\n",
+ p->symbol_name, p->addr, (long)regs->ARM_cpsr);
+#endif
+#ifdef CONFIG_RISCV
+ pr_info("<%s> p->addr = 0x%p, status = 0x%lx\n",
+ p->symbol_name, p->addr, regs->status);
+#endif
#ifdef CONFIG_S390
- pr_info("<%s> pre_handler: p->addr, 0x%p, flags = 0x%lx\n",
+ pr_info("<%s> p->addr, 0x%p, flags = 0x%lx\n",
p->symbol_name, p->addr, regs->flags);
#endif
}
-/*
- * fault_handler: this is called if an exception is generated for any
- * instruction within the pre- or post-handler, or when Kprobes
- * single-steps the probed instruction.
- */
-static int handler_fault(struct kprobe *p, struct pt_regs *regs, int trapnr)
-{
- pr_info("fault_handler: p->addr = 0x%p, trap #%dn", p->addr, trapnr);
- /* Return 0 because we don't handle the fault. */
- return 0;
-}
-
static int __init kprobe_init(void)
{
int ret;
kp.pre_handler = handler_pre;
kp.post_handler = handler_post;
- kp.fault_handler = handler_fault;
ret = register_kprobe(&kp);
if (ret < 0) {
diff --git a/samples/kprobes/kretprobe_example.c b/samples/kprobes/kretprobe_example.c
index 186315ca88b3..cbf16542d84e 100644
--- a/samples/kprobes/kretprobe_example.c
+++ b/samples/kprobes/kretprobe_example.c
@@ -8,10 +8,10 @@
*
* usage: insmod kretprobe_example.ko func=<func_name>
*
- * If no func_name is specified, _do_fork is instrumented
+ * If no func_name is specified, kernel_clone is instrumented
*
* For more information on theory of operation of kretprobes, see
- * Documentation/kprobes.txt
+ * Documentation/trace/kprobes.rst
*
* Build and insert the kernel module as done in the kprobe example.
* You will see the trace data in /var/log/messages and on the console
@@ -23,11 +23,10 @@
#include <linux/module.h>
#include <linux/kprobes.h>
#include <linux/ktime.h>
-#include <linux/limits.h>
#include <linux/sched.h>
-static char func_name[NAME_MAX] = "_do_fork";
-module_param_string(func, func_name, NAME_MAX, S_IRUGO);
+static char func_name[KSYM_NAME_LEN] = "kernel_clone";
+module_param_string(func, func_name, KSYM_NAME_LEN, 0644);
MODULE_PARM_DESC(func, "Function to kretprobe; this module will report the"
" function's execution time");
@@ -48,6 +47,7 @@ static int entry_handler(struct kretprobe_instance *ri, struct pt_regs *regs)
data->entry_stamp = ktime_get();
return 0;
}
+NOKPROBE_SYMBOL(entry_handler);
/*
* Return-probe handler: Log the return value and duration. Duration may turn
@@ -67,6 +67,7 @@ static int ret_handler(struct kretprobe_instance *ri, struct pt_regs *regs)
func_name, retval, (long long)delta);
return 0;
}
+NOKPROBE_SYMBOL(ret_handler);
static struct kretprobe my_kretprobe = {
.handler = ret_handler,
@@ -84,7 +85,7 @@ static int __init kretprobe_init(void)
ret = register_kretprobe(&my_kretprobe);
if (ret < 0) {
pr_err("register_kretprobe failed, returned %d\n", ret);
- return -1;
+ return ret;
}
pr_info("Planted return probe at %s: %p\n",
my_kretprobe.kp.symbol_name, my_kretprobe.kp.addr);
diff --git a/samples/landlock/.gitignore b/samples/landlock/.gitignore
new file mode 100644
index 000000000000..f43668b2d318
--- /dev/null
+++ b/samples/landlock/.gitignore
@@ -0,0 +1 @@
+/sandboxer
diff --git a/samples/landlock/Makefile b/samples/landlock/Makefile
new file mode 100644
index 000000000000..5d601e51c2eb
--- /dev/null
+++ b/samples/landlock/Makefile
@@ -0,0 +1,13 @@
+# SPDX-License-Identifier: BSD-3-Clause
+
+userprogs-always-y := sandboxer
+
+userccflags += -I usr/include
+
+.PHONY: all clean
+
+all:
+ $(MAKE) -C ../.. samples/landlock/
+
+clean:
+ $(MAKE) -C ../.. M=samples/landlock/ clean
diff --git a/samples/landlock/sandboxer.c b/samples/landlock/sandboxer.c
new file mode 100644
index 000000000000..f29bb3c72230
--- /dev/null
+++ b/samples/landlock/sandboxer.c
@@ -0,0 +1,290 @@
+// SPDX-License-Identifier: BSD-3-Clause
+/*
+ * Simple Landlock sandbox manager able to launch a process restricted by a
+ * user-defined filesystem access control policy.
+ *
+ * Copyright © 2017-2020 Mickaël Salaün <mic@digikod.net>
+ * Copyright © 2020 ANSSI
+ */
+
+#define _GNU_SOURCE
+#include <errno.h>
+#include <fcntl.h>
+#include <linux/landlock.h>
+#include <linux/prctl.h>
+#include <stddef.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/prctl.h>
+#include <sys/stat.h>
+#include <sys/syscall.h>
+#include <unistd.h>
+
+#ifndef landlock_create_ruleset
+static inline int
+landlock_create_ruleset(const struct landlock_ruleset_attr *const attr,
+ const size_t size, const __u32 flags)
+{
+ return syscall(__NR_landlock_create_ruleset, attr, size, flags);
+}
+#endif
+
+#ifndef landlock_add_rule
+static inline int landlock_add_rule(const int ruleset_fd,
+ const enum landlock_rule_type rule_type,
+ const void *const rule_attr,
+ const __u32 flags)
+{
+ return syscall(__NR_landlock_add_rule, ruleset_fd, rule_type, rule_attr,
+ flags);
+}
+#endif
+
+#ifndef landlock_restrict_self
+static inline int landlock_restrict_self(const int ruleset_fd,
+ const __u32 flags)
+{
+ return syscall(__NR_landlock_restrict_self, ruleset_fd, flags);
+}
+#endif
+
+#define ENV_FS_RO_NAME "LL_FS_RO"
+#define ENV_FS_RW_NAME "LL_FS_RW"
+#define ENV_PATH_TOKEN ":"
+
+static int parse_path(char *env_path, const char ***const path_list)
+{
+ int i, num_paths = 0;
+
+ if (env_path) {
+ num_paths++;
+ for (i = 0; env_path[i]; i++) {
+ if (env_path[i] == ENV_PATH_TOKEN[0])
+ num_paths++;
+ }
+ }
+ *path_list = malloc(num_paths * sizeof(**path_list));
+ for (i = 0; i < num_paths; i++)
+ (*path_list)[i] = strsep(&env_path, ENV_PATH_TOKEN);
+
+ return num_paths;
+}
+
+/* clang-format off */
+
+#define ACCESS_FILE ( \
+ LANDLOCK_ACCESS_FS_EXECUTE | \
+ LANDLOCK_ACCESS_FS_WRITE_FILE | \
+ LANDLOCK_ACCESS_FS_READ_FILE)
+
+/* clang-format on */
+
+static int populate_ruleset(const char *const env_var, const int ruleset_fd,
+ const __u64 allowed_access)
+{
+ int num_paths, i, ret = 1;
+ char *env_path_name;
+ const char **path_list = NULL;
+ struct landlock_path_beneath_attr path_beneath = {
+ .parent_fd = -1,
+ };
+
+ env_path_name = getenv(env_var);
+ if (!env_path_name) {
+ /* Prevents users to forget a setting. */
+ fprintf(stderr, "Missing environment variable %s\n", env_var);
+ return 1;
+ }
+ env_path_name = strdup(env_path_name);
+ unsetenv(env_var);
+ num_paths = parse_path(env_path_name, &path_list);
+ if (num_paths == 1 && path_list[0][0] == '\0') {
+ /*
+ * Allows to not use all possible restrictions (e.g. use
+ * LL_FS_RO without LL_FS_RW).
+ */
+ ret = 0;
+ goto out_free_name;
+ }
+
+ for (i = 0; i < num_paths; i++) {
+ struct stat statbuf;
+
+ path_beneath.parent_fd = open(path_list[i], O_PATH | O_CLOEXEC);
+ if (path_beneath.parent_fd < 0) {
+ fprintf(stderr, "Failed to open \"%s\": %s\n",
+ path_list[i], strerror(errno));
+ goto out_free_name;
+ }
+ if (fstat(path_beneath.parent_fd, &statbuf)) {
+ close(path_beneath.parent_fd);
+ goto out_free_name;
+ }
+ path_beneath.allowed_access = allowed_access;
+ if (!S_ISDIR(statbuf.st_mode))
+ path_beneath.allowed_access &= ACCESS_FILE;
+ if (landlock_add_rule(ruleset_fd, LANDLOCK_RULE_PATH_BENEATH,
+ &path_beneath, 0)) {
+ fprintf(stderr,
+ "Failed to update the ruleset with \"%s\": %s\n",
+ path_list[i], strerror(errno));
+ close(path_beneath.parent_fd);
+ goto out_free_name;
+ }
+ close(path_beneath.parent_fd);
+ }
+ ret = 0;
+
+out_free_name:
+ free(path_list);
+ free(env_path_name);
+ return ret;
+}
+
+/* clang-format off */
+
+#define ACCESS_FS_ROUGHLY_READ ( \
+ LANDLOCK_ACCESS_FS_EXECUTE | \
+ LANDLOCK_ACCESS_FS_READ_FILE | \
+ LANDLOCK_ACCESS_FS_READ_DIR)
+
+#define ACCESS_FS_ROUGHLY_WRITE ( \
+ LANDLOCK_ACCESS_FS_WRITE_FILE | \
+ LANDLOCK_ACCESS_FS_REMOVE_DIR | \
+ LANDLOCK_ACCESS_FS_REMOVE_FILE | \
+ LANDLOCK_ACCESS_FS_MAKE_CHAR | \
+ LANDLOCK_ACCESS_FS_MAKE_DIR | \
+ LANDLOCK_ACCESS_FS_MAKE_REG | \
+ LANDLOCK_ACCESS_FS_MAKE_SOCK | \
+ LANDLOCK_ACCESS_FS_MAKE_FIFO | \
+ LANDLOCK_ACCESS_FS_MAKE_BLOCK | \
+ LANDLOCK_ACCESS_FS_MAKE_SYM | \
+ LANDLOCK_ACCESS_FS_REFER)
+
+/* clang-format on */
+
+#define LANDLOCK_ABI_LAST 2
+
+int main(const int argc, char *const argv[], char *const *const envp)
+{
+ const char *cmd_path;
+ char *const *cmd_argv;
+ int ruleset_fd, abi;
+ __u64 access_fs_ro = ACCESS_FS_ROUGHLY_READ,
+ access_fs_rw = ACCESS_FS_ROUGHLY_READ | ACCESS_FS_ROUGHLY_WRITE;
+ struct landlock_ruleset_attr ruleset_attr = {
+ .handled_access_fs = access_fs_rw,
+ };
+
+ if (argc < 2) {
+ fprintf(stderr,
+ "usage: %s=\"...\" %s=\"...\" %s <cmd> [args]...\n\n",
+ ENV_FS_RO_NAME, ENV_FS_RW_NAME, argv[0]);
+ fprintf(stderr,
+ "Launch a command in a restricted environment.\n\n");
+ fprintf(stderr, "Environment variables containing paths, "
+ "each separated by a colon:\n");
+ fprintf(stderr,
+ "* %s: list of paths allowed to be used in a read-only way.\n",
+ ENV_FS_RO_NAME);
+ fprintf(stderr,
+ "* %s: list of paths allowed to be used in a read-write way.\n",
+ ENV_FS_RW_NAME);
+ fprintf(stderr,
+ "\nexample:\n"
+ "%s=\"/bin:/lib:/usr:/proc:/etc:/dev/urandom\" "
+ "%s=\"/dev/null:/dev/full:/dev/zero:/dev/pts:/tmp\" "
+ "%s bash -i\n\n",
+ ENV_FS_RO_NAME, ENV_FS_RW_NAME, argv[0]);
+ fprintf(stderr,
+ "This sandboxer can use Landlock features "
+ "up to ABI version %d.\n",
+ LANDLOCK_ABI_LAST);
+ return 1;
+ }
+
+ abi = landlock_create_ruleset(NULL, 0, LANDLOCK_CREATE_RULESET_VERSION);
+ if (abi < 0) {
+ const int err = errno;
+
+ perror("Failed to check Landlock compatibility");
+ switch (err) {
+ case ENOSYS:
+ fprintf(stderr,
+ "Hint: Landlock is not supported by the current kernel. "
+ "To support it, build the kernel with "
+ "CONFIG_SECURITY_LANDLOCK=y and prepend "
+ "\"landlock,\" to the content of CONFIG_LSM.\n");
+ break;
+ case EOPNOTSUPP:
+ fprintf(stderr,
+ "Hint: Landlock is currently disabled. "
+ "It can be enabled in the kernel configuration by "
+ "prepending \"landlock,\" to the content of CONFIG_LSM, "
+ "or at boot time by setting the same content to the "
+ "\"lsm\" kernel parameter.\n");
+ break;
+ }
+ return 1;
+ }
+
+ /* Best-effort security. */
+ switch (abi) {
+ case 1:
+ /* Removes LANDLOCK_ACCESS_FS_REFER for ABI < 2 */
+ ruleset_attr.handled_access_fs &= ~LANDLOCK_ACCESS_FS_REFER;
+
+ fprintf(stderr,
+ "Hint: You should update the running kernel "
+ "to leverage Landlock features "
+ "provided by ABI version %d (instead of %d).\n",
+ LANDLOCK_ABI_LAST, abi);
+ __attribute__((fallthrough));
+ case LANDLOCK_ABI_LAST:
+ break;
+ default:
+ fprintf(stderr,
+ "Hint: You should update this sandboxer "
+ "to leverage Landlock features "
+ "provided by ABI version %d (instead of %d).\n",
+ abi, LANDLOCK_ABI_LAST);
+ }
+ access_fs_ro &= ruleset_attr.handled_access_fs;
+ access_fs_rw &= ruleset_attr.handled_access_fs;
+
+ ruleset_fd =
+ landlock_create_ruleset(&ruleset_attr, sizeof(ruleset_attr), 0);
+ if (ruleset_fd < 0) {
+ perror("Failed to create a ruleset");
+ return 1;
+ }
+ if (populate_ruleset(ENV_FS_RO_NAME, ruleset_fd, access_fs_ro)) {
+ goto err_close_ruleset;
+ }
+ if (populate_ruleset(ENV_FS_RW_NAME, ruleset_fd, access_fs_rw)) {
+ goto err_close_ruleset;
+ }
+ if (prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0)) {
+ perror("Failed to restrict privileges");
+ goto err_close_ruleset;
+ }
+ if (landlock_restrict_self(ruleset_fd, 0)) {
+ perror("Failed to enforce ruleset");
+ goto err_close_ruleset;
+ }
+ close(ruleset_fd);
+
+ cmd_path = argv[1];
+ cmd_argv = argv + 1;
+ execvpe(cmd_path, cmd_argv, envp);
+ fprintf(stderr, "Failed to execute \"%s\": %s\n", cmd_path,
+ strerror(errno));
+ fprintf(stderr, "Hint: access to the binary, the interpreter or "
+ "shared libraries may be denied.\n");
+ return 1;
+
+err_close_ruleset:
+ close(ruleset_fd);
+ return 1;
+}
diff --git a/samples/livepatch/livepatch-shadow-fix1.c b/samples/livepatch/livepatch-shadow-fix1.c
index 918ce17b43fd..6701641bf12d 100644
--- a/samples/livepatch/livepatch-shadow-fix1.c
+++ b/samples/livepatch/livepatch-shadow-fix1.c
@@ -109,9 +109,9 @@ static void livepatch_fix1_dummy_leak_dtor(void *obj, void *shadow_data)
void *d = obj;
int **shadow_leak = shadow_data;
- kfree(*shadow_leak);
pr_info("%s: dummy @ %p, prevented leak @ %p\n",
__func__, d, *shadow_leak);
+ kfree(*shadow_leak);
}
static void livepatch_fix1_dummy_free(struct dummy *d)
diff --git a/samples/livepatch/livepatch-shadow-fix2.c b/samples/livepatch/livepatch-shadow-fix2.c
index 29fe5cd42047..361046a4f10c 100644
--- a/samples/livepatch/livepatch-shadow-fix2.c
+++ b/samples/livepatch/livepatch-shadow-fix2.c
@@ -61,9 +61,9 @@ static void livepatch_fix2_dummy_leak_dtor(void *obj, void *shadow_data)
void *d = obj;
int **shadow_leak = shadow_data;
- kfree(*shadow_leak);
pr_info("%s: dummy @ %p, prevented leak @ %p\n",
__func__, d, *shadow_leak);
+ kfree(*shadow_leak);
}
static void livepatch_fix2_dummy_free(struct dummy *d)
diff --git a/samples/mei/.gitignore b/samples/mei/.gitignore
index f356b81ca1ec..fe894bcb6a62 100644
--- a/samples/mei/.gitignore
+++ b/samples/mei/.gitignore
@@ -1 +1,2 @@
-mei-amt-version
+# SPDX-License-Identifier: GPL-2.0-only
+/mei-amt-version
diff --git a/samples/mei/Makefile b/samples/mei/Makefile
index f5b9d02be2cd..c54b8a0ab04e 100644
--- a/samples/mei/Makefile
+++ b/samples/mei/Makefile
@@ -1,10 +1,5 @@
# SPDX-License-Identifier: GPL-2.0
# Copyright (c) 2012-2019, Intel Corporation. All rights reserved.
+userprogs-always-y += mei-amt-version
-hostprogs := mei-amt-version
-
-HOSTCFLAGS_mei-amt-version.o += -I$(objtree)/usr/include
-
-always-y := $(hostprogs)
-
-all: mei-amt-version
+userccflags += -I usr/include
diff --git a/samples/mei/mei-amt-version.c b/samples/mei/mei-amt-version.c
index 32234481ad7d..867debd3b912 100644
--- a/samples/mei/mei-amt-version.c
+++ b/samples/mei/mei-amt-version.c
@@ -154,31 +154,52 @@ err:
static ssize_t mei_recv_msg(struct mei *me, unsigned char *buffer,
ssize_t len, unsigned long timeout)
{
+ struct timeval tv;
+ fd_set set;
ssize_t rc;
+ tv.tv_sec = timeout / 1000;
+ tv.tv_usec = (timeout % 1000) * 1000000;
+
mei_msg(me, "call read length = %zd\n", len);
+ FD_ZERO(&set);
+ FD_SET(me->fd, &set);
+ rc = select(me->fd + 1, &set, NULL, NULL, &tv);
+ if (rc > 0 && FD_ISSET(me->fd, &set)) {
+ mei_msg(me, "have reply\n");
+ } else if (rc == 0) {
+ rc = -1;
+ mei_err(me, "read failed on timeout\n");
+ goto out;
+ } else { /* rc < 0 */
+ rc = errno;
+ mei_err(me, "read failed on select with status %zd %s\n",
+ rc, strerror(errno));
+ goto out;
+ }
+
rc = read(me->fd, buffer, len);
if (rc < 0) {
mei_err(me, "read failed with status %zd %s\n",
rc, strerror(errno));
- mei_deinit(me);
- } else {
- mei_msg(me, "read succeeded with result %zd\n", rc);
+ goto out;
}
+
+ mei_msg(me, "read succeeded with result %zd\n", rc);
+
+out:
+ if (rc < 0)
+ mei_deinit(me);
+
return rc;
}
static ssize_t mei_send_msg(struct mei *me, const unsigned char *buffer,
ssize_t len, unsigned long timeout)
{
- struct timeval tv;
ssize_t written;
ssize_t rc;
- fd_set set;
-
- tv.tv_sec = timeout / 1000;
- tv.tv_usec = (timeout % 1000) * 1000000;
mei_msg(me, "call write length = %zd\n", len);
@@ -189,19 +210,7 @@ static ssize_t mei_send_msg(struct mei *me, const unsigned char *buffer,
written, strerror(errno));
goto out;
}
-
- FD_ZERO(&set);
- FD_SET(me->fd, &set);
- rc = select(me->fd + 1 , &set, NULL, NULL, &tv);
- if (rc > 0 && FD_ISSET(me->fd, &set)) {
- mei_msg(me, "write success\n");
- } else if (rc == 0) {
- mei_err(me, "write failed on timeout with status\n");
- goto out;
- } else { /* rc < 0 */
- mei_err(me, "write failed on select with status %zd\n", rc);
- goto out;
- }
+ mei_msg(me, "write success\n");
rc = written;
out:
@@ -267,7 +276,7 @@ struct amt_host_if_msg_header {
struct amt_host_if_resp_header {
struct amt_host_if_msg_header header;
uint32_t status;
- unsigned char data[0];
+ unsigned char data[];
} __attribute__((packed));
const uuid_le MEI_IAMTHIF = UUID_LE(0x12f80028, 0xb4b7, 0x4b2d, \
diff --git a/samples/mic/mpssd/.gitignore b/samples/mic/mpssd/.gitignore
deleted file mode 100644
index 8b7c72f07c92..000000000000
--- a/samples/mic/mpssd/.gitignore
+++ /dev/null
@@ -1 +0,0 @@
-mpssd
diff --git a/samples/mic/mpssd/Makefile b/samples/mic/mpssd/Makefile
deleted file mode 100644
index a7a6e0c70424..000000000000
--- a/samples/mic/mpssd/Makefile
+++ /dev/null
@@ -1,28 +0,0 @@
-# SPDX-License-Identifier: GPL-2.0
-ifndef CROSS_COMPILE
-uname_M := $(shell uname -m 2>/dev/null || echo not)
-ARCH ?= $(shell echo $(uname_M) | sed -e s/i.86/x86/ -e s/x86_64/x86/)
-
-ifeq ($(ARCH),x86)
-
-PROGS := mpssd
-CC = $(CROSS_COMPILE)gcc
-CFLAGS := -I../../../usr/include -I../../../tools/include
-
-ifdef DEBUG
-CFLAGS += -DDEBUG=$(DEBUG)
-endif
-
-all: $(PROGS)
-mpssd: mpssd.c sysfs.c
- $(CC) $(CFLAGS) mpssd.c sysfs.c -o mpssd -lpthread
-
-install:
- install mpssd /usr/sbin/mpssd
- install micctrl /usr/sbin/micctrl
-
-clean:
- rm -fr $(PROGS)
-
-endif
-endif
diff --git a/samples/mic/mpssd/micctrl b/samples/mic/mpssd/micctrl
deleted file mode 100755
index 030a60b04046..000000000000
--- a/samples/mic/mpssd/micctrl
+++ /dev/null
@@ -1,162 +0,0 @@
-#!/bin/bash
-# SPDX-License-Identifier: GPL-2.0-only
-# Intel MIC Platform Software Stack (MPSS)
-#
-# Copyright(c) 2013 Intel Corporation.
-#
-# Intel MIC User Space Tools.
-#
-# micctrl - Controls MIC boot/start/stop.
-#
-# chkconfig: 2345 95 05
-# description: start MPSS stack processing.
-#
-### BEGIN INIT INFO
-# Provides: micctrl
-### END INIT INFO
-
-# Source function library.
-. /etc/init.d/functions
-
-sysfs="/sys/class/mic"
-
-_status()
-{
- f=$sysfs/$1
- echo -e $1 state: "`cat $f/state`" shutdown_status: "`cat $f/shutdown_status`"
-}
-
-status()
-{
- if [ "`echo $1 | head -c3`" == "mic" ]; then
- _status $1
- return $?
- fi
- for f in $sysfs/*
- do
- _status `basename $f`
- RETVAL=$?
- [ $RETVAL -ne 0 ] && return $RETVAL
- done
- return 0
-}
-
-_reset()
-{
- f=$sysfs/$1
- echo reset > $f/state
-}
-
-reset()
-{
- if [ "`echo $1 | head -c3`" == "mic" ]; then
- _reset $1
- return $?
- fi
- for f in $sysfs/*
- do
- _reset `basename $f`
- RETVAL=$?
- [ $RETVAL -ne 0 ] && return $RETVAL
- done
- return 0
-}
-
-_boot()
-{
- f=$sysfs/$1
- echo "linux" > $f/bootmode
- echo "mic/uos.img" > $f/firmware
- echo "mic/$1.image" > $f/ramdisk
- echo "boot" > $f/state
-}
-
-boot()
-{
- if [ "`echo $1 | head -c3`" == "mic" ]; then
- _boot $1
- return $?
- fi
- for f in $sysfs/*
- do
- _boot `basename $f`
- RETVAL=$?
- [ $RETVAL -ne 0 ] && return $RETVAL
- done
- return 0
-}
-
-_shutdown()
-{
- f=$sysfs/$1
- echo shutdown > $f/state
-}
-
-shutdown()
-{
- if [ "`echo $1 | head -c3`" == "mic" ]; then
- _shutdown $1
- return $?
- fi
- for f in $sysfs/*
- do
- _shutdown `basename $f`
- RETVAL=$?
- [ $RETVAL -ne 0 ] && return $RETVAL
- done
- return 0
-}
-
-_wait()
-{
- f=$sysfs/$1
- while [ "`cat $f/state`" != "offline" -a "`cat $f/state`" != "online" ]
- do
- sleep 1
- echo -e "Waiting for $1 to go offline"
- done
-}
-
-wait()
-{
- if [ "`echo $1 | head -c3`" == "mic" ]; then
- _wait $1
- return $?
- fi
- # Wait for the cards to go offline
- for f in $sysfs/*
- do
- _wait `basename $f`
- RETVAL=$?
- [ $RETVAL -ne 0 ] && return $RETVAL
- done
- return 0
-}
-
-if [ ! -d "$sysfs" ]; then
- echo -e $"Module unloaded "
- exit 3
-fi
-
-case $1 in
- -s)
- status $2
- ;;
- -r)
- reset $2
- ;;
- -b)
- boot $2
- ;;
- -S)
- shutdown $2
- ;;
- -w)
- wait $2
- ;;
- *)
- echo $"Usage: $0 {-s (status) |-r (reset) |-b (boot) |-S (shutdown) |-w (wait)}"
- exit 2
-esac
-
-exit $?
diff --git a/samples/mic/mpssd/mpss b/samples/mic/mpssd/mpss
deleted file mode 100755
index 248ac7313c71..000000000000
--- a/samples/mic/mpssd/mpss
+++ /dev/null
@@ -1,189 +0,0 @@
-#!/bin/bash
-# SPDX-License-Identifier: GPL-2.0-only
-# Intel MIC Platform Software Stack (MPSS)
-#
-# Copyright(c) 2013 Intel Corporation.
-#
-# Intel MIC User Space Tools.
-#
-# mpss Start mpssd.
-#
-# chkconfig: 2345 95 05
-# description: start MPSS stack processing.
-#
-### BEGIN INIT INFO
-# Provides: mpss
-# Required-Start:
-# Required-Stop:
-# Short-Description: MPSS stack control
-# Description: MPSS stack control
-### END INIT INFO
-
-# Source function library.
-. /etc/init.d/functions
-
-exec=/usr/sbin/mpssd
-sysfs="/sys/class/mic"
-mic_modules="mic_host mic_x100_dma scif vop"
-
-start()
-{
- [ -x $exec ] || exit 5
-
- if [ "`ps -e | awk '{print $4}' | grep mpssd | head -1`" = "mpssd" ]; then
- echo -e $"MPSSD already running! "
- success
- echo
- return 0
- fi
-
- echo -e $"Starting MPSS Stack"
- echo -e $"Loading MIC drivers:" $mic_modules
-
- modprobe -a $mic_modules
- RETVAL=$?
- if [ $RETVAL -ne 0 ]; then
- failure
- echo
- return $RETVAL
- fi
-
- # Start the daemon
- echo -n $"Starting MPSSD "
- $exec
- RETVAL=$?
- if [ $RETVAL -ne 0 ]; then
- failure
- echo
- return $RETVAL
- fi
- success
- echo
-
- sleep 5
-
- # Boot the cards
- micctrl -b
-
- # Wait till ping works
- for f in $sysfs/*
- do
- count=100
- ipaddr=`cat $f/cmdline`
- ipaddr=${ipaddr#*address,}
- ipaddr=`echo $ipaddr | cut -d, -f1 | cut -d\; -f1`
- while [ $count -ge 0 ]
- do
- echo -e "Pinging "`basename $f`" "
- ping -c 1 $ipaddr &> /dev/null
- RETVAL=$?
- if [ $RETVAL -eq 0 ]; then
- success
- break
- fi
- sleep 1
- count=`expr $count - 1`
- done
- [ $RETVAL -ne 0 ] && failure || success
- echo
- done
- return $RETVAL
-}
-
-stop()
-{
- echo -e $"Shutting down MPSS Stack: "
-
- # Bail out if module is unloaded
- if [ ! -d "$sysfs" ]; then
- echo -n $"Module unloaded "
- success
- echo
- return 0
- fi
-
- # Shut down the cards.
- micctrl -S
-
- # Wait for the cards to go offline
- for f in $sysfs/*
- do
- while [ "`cat $f/state`" != "ready" ]
- do
- sleep 1
- echo -e "Waiting for "`basename $f`" to become ready"
- done
- done
-
- # Display the status of the cards
- micctrl -s
-
- # Kill MPSSD now
- echo -n $"Killing MPSSD"
- killall -9 mpssd 2>/dev/null
- RETVAL=$?
- [ $RETVAL -ne 0 ] && failure || success
- echo
- return $RETVAL
-}
-
-restart()
-{
- stop
- sleep 5
- start
-}
-
-status()
-{
- micctrl -s
- if [ "`ps -e | awk '{print $4}' | grep mpssd | head -n 1`" = "mpssd" ]; then
- echo "mpssd is running"
- else
- echo "mpssd is stopped"
- fi
- return 0
-}
-
-unload()
-{
- if [ ! -d "$sysfs" ]; then
- echo -n $"No MIC_HOST Module: "
- success
- echo
- return
- fi
-
- stop
-
- sleep 5
- echo -n $"Removing MIC drivers:" $mic_modules
- modprobe -r $mic_modules
- RETVAL=$?
- [ $RETVAL -ne 0 ] && failure || success
- echo
- return $RETVAL
-}
-
-case $1 in
- start)
- start
- ;;
- stop)
- stop
- ;;
- restart)
- restart
- ;;
- status)
- status
- ;;
- unload)
- unload
- ;;
- *)
- echo $"Usage: $0 {start|stop|restart|status|unload}"
- exit 2
-esac
-
-exit $?
diff --git a/samples/mic/mpssd/mpssd.c b/samples/mic/mpssd/mpssd.c
deleted file mode 100644
index a11bf6c5b53b..000000000000
--- a/samples/mic/mpssd/mpssd.c
+++ /dev/null
@@ -1,1815 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * Intel MIC Platform Software Stack (MPSS)
- *
- * Copyright(c) 2013 Intel Corporation.
- *
- * Intel MIC User Space Tools.
- */
-
-#define _GNU_SOURCE
-
-#include <stdlib.h>
-#include <fcntl.h>
-#include <getopt.h>
-#include <assert.h>
-#include <unistd.h>
-#include <stdbool.h>
-#include <signal.h>
-#include <poll.h>
-#include <features.h>
-#include <sys/types.h>
-#include <sys/stat.h>
-#include <sys/mman.h>
-#include <sys/socket.h>
-#include <linux/virtio_ring.h>
-#include <linux/virtio_net.h>
-#include <linux/virtio_console.h>
-#include <linux/virtio_blk.h>
-#include <linux/version.h>
-#include "mpssd.h"
-#include <linux/mic_ioctl.h>
-#include <linux/mic_common.h>
-#include <tools/endian.h>
-
-static void *init_mic(void *arg);
-
-static FILE *logfp;
-static struct mic_info mic_list;
-
-#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
-
-#define min_t(type, x, y) ({ \
- type __min1 = (x); \
- type __min2 = (y); \
- __min1 < __min2 ? __min1 : __min2; })
-
-/* align addr on a size boundary - adjust address up/down if needed */
-#define _ALIGN_DOWN(addr, size) ((addr)&(~((size)-1)))
-#define _ALIGN_UP(addr, size) _ALIGN_DOWN(addr + size - 1, size)
-
-/* align addr on a size boundary - adjust address up if needed */
-#define _ALIGN(addr, size) _ALIGN_UP(addr, size)
-
-/* to align the pointer to the (next) page boundary */
-#define PAGE_ALIGN(addr) _ALIGN(addr, PAGE_SIZE)
-
-#define READ_ONCE(x) (*(volatile typeof(x) *)&(x))
-
-#define GSO_ENABLED 1
-#define MAX_GSO_SIZE (64 * 1024)
-#define ETH_H_LEN 14
-#define MAX_NET_PKT_SIZE (_ALIGN_UP(MAX_GSO_SIZE + ETH_H_LEN, 64))
-#define MIC_DEVICE_PAGE_END 0x1000
-
-#ifndef VIRTIO_NET_HDR_F_DATA_VALID
-#define VIRTIO_NET_HDR_F_DATA_VALID 2 /* Csum is valid */
-#endif
-
-static struct {
- struct mic_device_desc dd;
- struct mic_vqconfig vqconfig[2];
- __u32 host_features, guest_acknowledgements;
- struct virtio_console_config cons_config;
-} virtcons_dev_page = {
- .dd = {
- .type = VIRTIO_ID_CONSOLE,
- .num_vq = ARRAY_SIZE(virtcons_dev_page.vqconfig),
- .feature_len = sizeof(virtcons_dev_page.host_features),
- .config_len = sizeof(virtcons_dev_page.cons_config),
- },
- .vqconfig[0] = {
- .num = htole16(MIC_VRING_ENTRIES),
- },
- .vqconfig[1] = {
- .num = htole16(MIC_VRING_ENTRIES),
- },
-};
-
-static struct {
- struct mic_device_desc dd;
- struct mic_vqconfig vqconfig[2];
- __u32 host_features, guest_acknowledgements;
- struct virtio_net_config net_config;
-} virtnet_dev_page = {
- .dd = {
- .type = VIRTIO_ID_NET,
- .num_vq = ARRAY_SIZE(virtnet_dev_page.vqconfig),
- .feature_len = sizeof(virtnet_dev_page.host_features),
- .config_len = sizeof(virtnet_dev_page.net_config),
- },
- .vqconfig[0] = {
- .num = htole16(MIC_VRING_ENTRIES),
- },
- .vqconfig[1] = {
- .num = htole16(MIC_VRING_ENTRIES),
- },
-#if GSO_ENABLED
- .host_features = htole32(
- 1 << VIRTIO_NET_F_CSUM |
- 1 << VIRTIO_NET_F_GSO |
- 1 << VIRTIO_NET_F_GUEST_TSO4 |
- 1 << VIRTIO_NET_F_GUEST_TSO6 |
- 1 << VIRTIO_NET_F_GUEST_ECN),
-#else
- .host_features = 0,
-#endif
-};
-
-static const char *mic_config_dir = "/etc/mpss";
-static const char *virtblk_backend = "VIRTBLK_BACKEND";
-static struct {
- struct mic_device_desc dd;
- struct mic_vqconfig vqconfig[1];
- __u32 host_features, guest_acknowledgements;
- struct virtio_blk_config blk_config;
-} virtblk_dev_page = {
- .dd = {
- .type = VIRTIO_ID_BLOCK,
- .num_vq = ARRAY_SIZE(virtblk_dev_page.vqconfig),
- .feature_len = sizeof(virtblk_dev_page.host_features),
- .config_len = sizeof(virtblk_dev_page.blk_config),
- },
- .vqconfig[0] = {
- .num = htole16(MIC_VRING_ENTRIES),
- },
- .host_features =
- htole32(1<<VIRTIO_BLK_F_SEG_MAX),
- .blk_config = {
- .seg_max = htole32(MIC_VRING_ENTRIES - 2),
- .capacity = htole64(0),
- }
-};
-
-static char *myname;
-
-static int
-tap_configure(struct mic_info *mic, char *dev)
-{
- pid_t pid;
- char *ifargv[7];
- char ipaddr[IFNAMSIZ];
- int ret = 0;
-
- pid = fork();
- if (pid == 0) {
- ifargv[0] = "ip";
- ifargv[1] = "link";
- ifargv[2] = "set";
- ifargv[3] = dev;
- ifargv[4] = "up";
- ifargv[5] = NULL;
- mpsslog("Configuring %s\n", dev);
- ret = execvp("ip", ifargv);
- if (ret < 0) {
- mpsslog("%s execvp failed errno %s\n",
- mic->name, strerror(errno));
- return ret;
- }
- }
- if (pid < 0) {
- mpsslog("%s fork failed errno %s\n",
- mic->name, strerror(errno));
- return ret;
- }
-
- ret = waitpid(pid, NULL, 0);
- if (ret < 0) {
- mpsslog("%s waitpid failed errno %s\n",
- mic->name, strerror(errno));
- return ret;
- }
-
- snprintf(ipaddr, IFNAMSIZ, "172.31.%d.254/24", mic->id + 1);
-
- pid = fork();
- if (pid == 0) {
- ifargv[0] = "ip";
- ifargv[1] = "addr";
- ifargv[2] = "add";
- ifargv[3] = ipaddr;
- ifargv[4] = "dev";
- ifargv[5] = dev;
- ifargv[6] = NULL;
- mpsslog("Configuring %s ipaddr %s\n", dev, ipaddr);
- ret = execvp("ip", ifargv);
- if (ret < 0) {
- mpsslog("%s execvp failed errno %s\n",
- mic->name, strerror(errno));
- return ret;
- }
- }
- if (pid < 0) {
- mpsslog("%s fork failed errno %s\n",
- mic->name, strerror(errno));
- return ret;
- }
-
- ret = waitpid(pid, NULL, 0);
- if (ret < 0) {
- mpsslog("%s waitpid failed errno %s\n",
- mic->name, strerror(errno));
- return ret;
- }
- mpsslog("MIC name %s %s %d DONE!\n",
- mic->name, __func__, __LINE__);
- return 0;
-}
-
-static int tun_alloc(struct mic_info *mic, char *dev)
-{
- struct ifreq ifr;
- int fd, err;
-#if GSO_ENABLED
- unsigned offload;
-#endif
- fd = open("/dev/net/tun", O_RDWR);
- if (fd < 0) {
- mpsslog("Could not open /dev/net/tun %s\n", strerror(errno));
- goto done;
- }
-
- memset(&ifr, 0, sizeof(ifr));
-
- ifr.ifr_flags = IFF_TAP | IFF_NO_PI | IFF_VNET_HDR;
- if (*dev)
- strncpy(ifr.ifr_name, dev, IFNAMSIZ);
-
- err = ioctl(fd, TUNSETIFF, (void *)&ifr);
- if (err < 0) {
- mpsslog("%s %s %d TUNSETIFF failed %s\n",
- mic->name, __func__, __LINE__, strerror(errno));
- close(fd);
- return err;
- }
-#if GSO_ENABLED
- offload = TUN_F_CSUM | TUN_F_TSO4 | TUN_F_TSO6 | TUN_F_TSO_ECN;
-
- err = ioctl(fd, TUNSETOFFLOAD, offload);
- if (err < 0) {
- mpsslog("%s %s %d TUNSETOFFLOAD failed %s\n",
- mic->name, __func__, __LINE__, strerror(errno));
- close(fd);
- return err;
- }
-#endif
- strcpy(dev, ifr.ifr_name);
- mpsslog("Created TAP %s\n", dev);
-done:
- return fd;
-}
-
-#define NET_FD_VIRTIO_NET 0
-#define NET_FD_TUN 1
-#define MAX_NET_FD 2
-
-static void set_dp(struct mic_info *mic, int type, void *dp)
-{
- switch (type) {
- case VIRTIO_ID_CONSOLE:
- mic->mic_console.console_dp = dp;
- return;
- case VIRTIO_ID_NET:
- mic->mic_net.net_dp = dp;
- return;
- case VIRTIO_ID_BLOCK:
- mic->mic_virtblk.block_dp = dp;
- return;
- }
- mpsslog("%s %s %d not found\n", mic->name, __func__, type);
- assert(0);
-}
-
-static void *get_dp(struct mic_info *mic, int type)
-{
- switch (type) {
- case VIRTIO_ID_CONSOLE:
- return mic->mic_console.console_dp;
- case VIRTIO_ID_NET:
- return mic->mic_net.net_dp;
- case VIRTIO_ID_BLOCK:
- return mic->mic_virtblk.block_dp;
- }
- mpsslog("%s %s %d not found\n", mic->name, __func__, type);
- assert(0);
- return NULL;
-}
-
-static struct mic_device_desc *get_device_desc(struct mic_info *mic, int type)
-{
- struct mic_device_desc *d;
- int i;
- void *dp = get_dp(mic, type);
-
- for (i = sizeof(struct mic_bootparam); i < PAGE_SIZE;
- i += mic_total_desc_size(d)) {
- d = dp + i;
-
- /* End of list */
- if (d->type == 0)
- break;
-
- if (d->type == -1)
- continue;
-
- mpsslog("%s %s d-> type %d d %p\n",
- mic->name, __func__, d->type, d);
-
- if (d->type == (__u8)type)
- return d;
- }
- mpsslog("%s %s %d not found\n", mic->name, __func__, type);
- return NULL;
-}
-
-/* See comments in vhost.c for explanation of next_desc() */
-static unsigned next_desc(struct vring_desc *desc)
-{
- unsigned int next;
-
- if (!(le16toh(desc->flags) & VRING_DESC_F_NEXT))
- return -1U;
- next = le16toh(desc->next);
- return next;
-}
-
-/* Sum up all the IOVEC length */
-static ssize_t
-sum_iovec_len(struct mic_copy_desc *copy)
-{
- ssize_t sum = 0;
- unsigned int i;
-
- for (i = 0; i < copy->iovcnt; i++)
- sum += copy->iov[i].iov_len;
- return sum;
-}
-
-static inline void verify_out_len(struct mic_info *mic,
- struct mic_copy_desc *copy)
-{
- if (copy->out_len != sum_iovec_len(copy)) {
- mpsslog("%s %s %d BUG copy->out_len 0x%x len 0x%zx\n",
- mic->name, __func__, __LINE__,
- copy->out_len, sum_iovec_len(copy));
- assert(copy->out_len == sum_iovec_len(copy));
- }
-}
-
-/* Display an iovec */
-static void
-disp_iovec(struct mic_info *mic, struct mic_copy_desc *copy,
- const char *s, int line)
-{
- unsigned int i;
-
- for (i = 0; i < copy->iovcnt; i++)
- mpsslog("%s %s %d copy->iov[%d] addr %p len 0x%zx\n",
- mic->name, s, line, i,
- copy->iov[i].iov_base, copy->iov[i].iov_len);
-}
-
-static inline __u16 read_avail_idx(struct mic_vring *vr)
-{
- return READ_ONCE(vr->info->avail_idx);
-}
-
-static inline void txrx_prepare(int type, bool tx, struct mic_vring *vr,
- struct mic_copy_desc *copy, ssize_t len)
-{
- copy->vr_idx = tx ? 0 : 1;
- copy->update_used = true;
- if (type == VIRTIO_ID_NET)
- copy->iov[1].iov_len = len - sizeof(struct virtio_net_hdr);
- else
- copy->iov[0].iov_len = len;
-}
-
-/* Central API which triggers the copies */
-static int
-mic_virtio_copy(struct mic_info *mic, int fd,
- struct mic_vring *vr, struct mic_copy_desc *copy)
-{
- int ret;
-
- ret = ioctl(fd, MIC_VIRTIO_COPY_DESC, copy);
- if (ret) {
- mpsslog("%s %s %d errno %s ret %d\n",
- mic->name, __func__, __LINE__,
- strerror(errno), ret);
- }
- return ret;
-}
-
-static inline unsigned _vring_size(unsigned int num, unsigned long align)
-{
- return ((sizeof(struct vring_desc) * num + sizeof(__u16) * (3 + num)
- + align - 1) & ~(align - 1))
- + sizeof(__u16) * 3 + sizeof(struct vring_used_elem) * num;
-}
-
-/*
- * This initialization routine requires at least one
- * vring i.e. vr0. vr1 is optional.
- */
-static void *
-init_vr(struct mic_info *mic, int fd, int type,
- struct mic_vring *vr0, struct mic_vring *vr1, int num_vq)
-{
- int vr_size;
- char *va;
-
- vr_size = PAGE_ALIGN(_vring_size(MIC_VRING_ENTRIES,
- MIC_VIRTIO_RING_ALIGN) +
- sizeof(struct _mic_vring_info));
- va = mmap(NULL, MIC_DEVICE_PAGE_END + vr_size * num_vq,
- PROT_READ, MAP_SHARED, fd, 0);
- if (MAP_FAILED == va) {
- mpsslog("%s %s %d mmap failed errno %s\n",
- mic->name, __func__, __LINE__,
- strerror(errno));
- goto done;
- }
- set_dp(mic, type, va);
- vr0->va = (struct mic_vring *)&va[MIC_DEVICE_PAGE_END];
- vr0->info = vr0->va +
- _vring_size(MIC_VRING_ENTRIES, MIC_VIRTIO_RING_ALIGN);
- vring_init(&vr0->vr,
- MIC_VRING_ENTRIES, vr0->va, MIC_VIRTIO_RING_ALIGN);
- mpsslog("%s %s vr0 %p vr0->info %p vr_size 0x%x vring 0x%x ",
- __func__, mic->name, vr0->va, vr0->info, vr_size,
- _vring_size(MIC_VRING_ENTRIES, MIC_VIRTIO_RING_ALIGN));
- mpsslog("magic 0x%x expected 0x%x\n",
- le32toh(vr0->info->magic), MIC_MAGIC + type);
- assert(le32toh(vr0->info->magic) == MIC_MAGIC + type);
- if (vr1) {
- vr1->va = (struct mic_vring *)
- &va[MIC_DEVICE_PAGE_END + vr_size];
- vr1->info = vr1->va + _vring_size(MIC_VRING_ENTRIES,
- MIC_VIRTIO_RING_ALIGN);
- vring_init(&vr1->vr,
- MIC_VRING_ENTRIES, vr1->va, MIC_VIRTIO_RING_ALIGN);
- mpsslog("%s %s vr1 %p vr1->info %p vr_size 0x%x vring 0x%x ",
- __func__, mic->name, vr1->va, vr1->info, vr_size,
- _vring_size(MIC_VRING_ENTRIES, MIC_VIRTIO_RING_ALIGN));
- mpsslog("magic 0x%x expected 0x%x\n",
- le32toh(vr1->info->magic), MIC_MAGIC + type + 1);
- assert(le32toh(vr1->info->magic) == MIC_MAGIC + type + 1);
- }
-done:
- return va;
-}
-
-static int
-wait_for_card_driver(struct mic_info *mic, int fd, int type)
-{
- struct pollfd pollfd;
- int err;
- struct mic_device_desc *desc = get_device_desc(mic, type);
- __u8 prev_status;
-
- if (!desc)
- return -ENODEV;
- prev_status = desc->status;
- pollfd.fd = fd;
- mpsslog("%s %s Waiting .... desc-> type %d status 0x%x\n",
- mic->name, __func__, type, desc->status);
-
- while (1) {
- pollfd.events = POLLIN;
- pollfd.revents = 0;
- err = poll(&pollfd, 1, -1);
- if (err < 0) {
- mpsslog("%s %s poll failed %s\n",
- mic->name, __func__, strerror(errno));
- continue;
- }
-
- if (pollfd.revents) {
- if (desc->status != prev_status) {
- mpsslog("%s %s Waiting... desc-> type %d "
- "status 0x%x\n",
- mic->name, __func__, type,
- desc->status);
- prev_status = desc->status;
- }
- if (desc->status & VIRTIO_CONFIG_S_DRIVER_OK) {
- mpsslog("%s %s poll.revents %d\n",
- mic->name, __func__, pollfd.revents);
- mpsslog("%s %s desc-> type %d status 0x%x\n",
- mic->name, __func__, type,
- desc->status);
- break;
- }
- }
- }
- return 0;
-}
-
-/* Spin till we have some descriptors */
-static void
-spin_for_descriptors(struct mic_info *mic, struct mic_vring *vr)
-{
- __u16 avail_idx = read_avail_idx(vr);
-
- while (avail_idx == le16toh(READ_ONCE(vr->vr.avail->idx))) {
-#ifdef DEBUG
- mpsslog("%s %s waiting for desc avail %d info_avail %d\n",
- mic->name, __func__,
- le16toh(vr->vr.avail->idx), vr->info->avail_idx);
-#endif
- sched_yield();
- }
-}
-
-static void *
-virtio_net(void *arg)
-{
- static __u8 vnet_hdr[2][sizeof(struct virtio_net_hdr)];
- static __u8 vnet_buf[2][MAX_NET_PKT_SIZE] __attribute__ ((aligned(64)));
- struct iovec vnet_iov[2][2] = {
- { { .iov_base = vnet_hdr[0], .iov_len = sizeof(vnet_hdr[0]) },
- { .iov_base = vnet_buf[0], .iov_len = sizeof(vnet_buf[0]) } },
- { { .iov_base = vnet_hdr[1], .iov_len = sizeof(vnet_hdr[1]) },
- { .iov_base = vnet_buf[1], .iov_len = sizeof(vnet_buf[1]) } },
- };
- struct iovec *iov0 = vnet_iov[0], *iov1 = vnet_iov[1];
- struct mic_info *mic = (struct mic_info *)arg;
- char if_name[IFNAMSIZ];
- struct pollfd net_poll[MAX_NET_FD];
- struct mic_vring tx_vr, rx_vr;
- struct mic_copy_desc copy;
- struct mic_device_desc *desc;
- int err;
-
- snprintf(if_name, IFNAMSIZ, "mic%d", mic->id);
- mic->mic_net.tap_fd = tun_alloc(mic, if_name);
- if (mic->mic_net.tap_fd < 0)
- goto done;
-
- if (tap_configure(mic, if_name))
- goto done;
- mpsslog("MIC name %s id %d\n", mic->name, mic->id);
-
- net_poll[NET_FD_VIRTIO_NET].fd = mic->mic_net.virtio_net_fd;
- net_poll[NET_FD_VIRTIO_NET].events = POLLIN;
- net_poll[NET_FD_TUN].fd = mic->mic_net.tap_fd;
- net_poll[NET_FD_TUN].events = POLLIN;
-
- if (MAP_FAILED == init_vr(mic, mic->mic_net.virtio_net_fd,
- VIRTIO_ID_NET, &tx_vr, &rx_vr,
- virtnet_dev_page.dd.num_vq)) {
- mpsslog("%s init_vr failed %s\n",
- mic->name, strerror(errno));
- goto done;
- }
-
- copy.iovcnt = 2;
- desc = get_device_desc(mic, VIRTIO_ID_NET);
-
- while (1) {
- ssize_t len;
-
- net_poll[NET_FD_VIRTIO_NET].revents = 0;
- net_poll[NET_FD_TUN].revents = 0;
-
- /* Start polling for data from tap and virtio net */
- err = poll(net_poll, 2, -1);
- if (err < 0) {
- mpsslog("%s poll failed %s\n",
- __func__, strerror(errno));
- continue;
- }
- if (!(desc->status & VIRTIO_CONFIG_S_DRIVER_OK)) {
- err = wait_for_card_driver(mic,
- mic->mic_net.virtio_net_fd,
- VIRTIO_ID_NET);
- if (err) {
- mpsslog("%s %s %d Exiting...\n",
- mic->name, __func__, __LINE__);
- break;
- }
- }
- /*
- * Check if there is data to be read from TUN and write to
- * virtio net fd if there is.
- */
- if (net_poll[NET_FD_TUN].revents & POLLIN) {
- copy.iov = iov0;
- len = readv(net_poll[NET_FD_TUN].fd,
- copy.iov, copy.iovcnt);
- if (len > 0) {
- struct virtio_net_hdr *hdr
- = (struct virtio_net_hdr *)vnet_hdr[0];
-
- /* Disable checksums on the card since we are on
- a reliable PCIe link */
- hdr->flags |= VIRTIO_NET_HDR_F_DATA_VALID;
-#ifdef DEBUG
- mpsslog("%s %s %d hdr->flags 0x%x ", mic->name,
- __func__, __LINE__, hdr->flags);
- mpsslog("copy.out_len %d hdr->gso_type 0x%x\n",
- copy.out_len, hdr->gso_type);
-#endif
-#ifdef DEBUG
- disp_iovec(mic, copy, __func__, __LINE__);
- mpsslog("%s %s %d read from tap 0x%lx\n",
- mic->name, __func__, __LINE__,
- len);
-#endif
- spin_for_descriptors(mic, &tx_vr);
- txrx_prepare(VIRTIO_ID_NET, 1, &tx_vr, &copy,
- len);
-
- err = mic_virtio_copy(mic,
- mic->mic_net.virtio_net_fd, &tx_vr,
- &copy);
- if (err < 0) {
- mpsslog("%s %s %d mic_virtio_copy %s\n",
- mic->name, __func__, __LINE__,
- strerror(errno));
- }
- if (!err)
- verify_out_len(mic, &copy);
-#ifdef DEBUG
- disp_iovec(mic, copy, __func__, __LINE__);
- mpsslog("%s %s %d wrote to net 0x%lx\n",
- mic->name, __func__, __LINE__,
- sum_iovec_len(&copy));
-#endif
- /* Reinitialize IOV for next run */
- iov0[1].iov_len = MAX_NET_PKT_SIZE;
- } else if (len < 0) {
- disp_iovec(mic, &copy, __func__, __LINE__);
- mpsslog("%s %s %d read failed %s ", mic->name,
- __func__, __LINE__, strerror(errno));
- mpsslog("cnt %d sum %zd\n",
- copy.iovcnt, sum_iovec_len(&copy));
- }
- }
-
- /*
- * Check if there is data to be read from virtio net and
- * write to TUN if there is.
- */
- if (net_poll[NET_FD_VIRTIO_NET].revents & POLLIN) {
- while (rx_vr.info->avail_idx !=
- le16toh(rx_vr.vr.avail->idx)) {
- copy.iov = iov1;
- txrx_prepare(VIRTIO_ID_NET, 0, &rx_vr, &copy,
- MAX_NET_PKT_SIZE
- + sizeof(struct virtio_net_hdr));
-
- err = mic_virtio_copy(mic,
- mic->mic_net.virtio_net_fd, &rx_vr,
- &copy);
- if (!err) {
-#ifdef DEBUG
- struct virtio_net_hdr *hdr
- = (struct virtio_net_hdr *)
- vnet_hdr[1];
-
- mpsslog("%s %s %d hdr->flags 0x%x, ",
- mic->name, __func__, __LINE__,
- hdr->flags);
- mpsslog("out_len %d gso_type 0x%x\n",
- copy.out_len,
- hdr->gso_type);
-#endif
- /* Set the correct output iov_len */
- iov1[1].iov_len = copy.out_len -
- sizeof(struct virtio_net_hdr);
- verify_out_len(mic, &copy);
-#ifdef DEBUG
- disp_iovec(mic, copy, __func__,
- __LINE__);
- mpsslog("%s %s %d ",
- mic->name, __func__, __LINE__);
- mpsslog("read from net 0x%lx\n",
- sum_iovec_len(copy));
-#endif
- len = writev(net_poll[NET_FD_TUN].fd,
- copy.iov, copy.iovcnt);
- if (len != sum_iovec_len(&copy)) {
- mpsslog("Tun write failed %s ",
- strerror(errno));
- mpsslog("len 0x%zx ", len);
- mpsslog("read_len 0x%zx\n",
- sum_iovec_len(&copy));
- } else {
-#ifdef DEBUG
- disp_iovec(mic, &copy, __func__,
- __LINE__);
- mpsslog("%s %s %d ",
- mic->name, __func__,
- __LINE__);
- mpsslog("wrote to tap 0x%lx\n",
- len);
-#endif
- }
- } else {
- mpsslog("%s %s %d mic_virtio_copy %s\n",
- mic->name, __func__, __LINE__,
- strerror(errno));
- break;
- }
- }
- }
- if (net_poll[NET_FD_VIRTIO_NET].revents & POLLERR)
- mpsslog("%s: %s: POLLERR\n", __func__, mic->name);
- }
-done:
- pthread_exit(NULL);
-}
-
-/* virtio_console */
-#define VIRTIO_CONSOLE_FD 0
-#define MONITOR_FD (VIRTIO_CONSOLE_FD + 1)
-#define MAX_CONSOLE_FD (MONITOR_FD + 1) /* must be the last one + 1 */
-#define MAX_BUFFER_SIZE PAGE_SIZE
-
-static void *
-virtio_console(void *arg)
-{
- static __u8 vcons_buf[2][PAGE_SIZE];
- struct iovec vcons_iov[2] = {
- { .iov_base = vcons_buf[0], .iov_len = sizeof(vcons_buf[0]) },
- { .iov_base = vcons_buf[1], .iov_len = sizeof(vcons_buf[1]) },
- };
- struct iovec *iov0 = &vcons_iov[0], *iov1 = &vcons_iov[1];
- struct mic_info *mic = (struct mic_info *)arg;
- int err;
- struct pollfd console_poll[MAX_CONSOLE_FD];
- int pty_fd;
- char *pts_name;
- ssize_t len;
- struct mic_vring tx_vr, rx_vr;
- struct mic_copy_desc copy;
- struct mic_device_desc *desc;
-
- pty_fd = posix_openpt(O_RDWR);
- if (pty_fd < 0) {
- mpsslog("can't open a pseudoterminal master device: %s\n",
- strerror(errno));
- goto _return;
- }
- pts_name = ptsname(pty_fd);
- if (pts_name == NULL) {
- mpsslog("can't get pts name\n");
- goto _close_pty;
- }
- printf("%s console message goes to %s\n", mic->name, pts_name);
- mpsslog("%s console message goes to %s\n", mic->name, pts_name);
- err = grantpt(pty_fd);
- if (err < 0) {
- mpsslog("can't grant access: %s %s\n",
- pts_name, strerror(errno));
- goto _close_pty;
- }
- err = unlockpt(pty_fd);
- if (err < 0) {
- mpsslog("can't unlock a pseudoterminal: %s %s\n",
- pts_name, strerror(errno));
- goto _close_pty;
- }
- console_poll[MONITOR_FD].fd = pty_fd;
- console_poll[MONITOR_FD].events = POLLIN;
-
- console_poll[VIRTIO_CONSOLE_FD].fd = mic->mic_console.virtio_console_fd;
- console_poll[VIRTIO_CONSOLE_FD].events = POLLIN;
-
- if (MAP_FAILED == init_vr(mic, mic->mic_console.virtio_console_fd,
- VIRTIO_ID_CONSOLE, &tx_vr, &rx_vr,
- virtcons_dev_page.dd.num_vq)) {
- mpsslog("%s init_vr failed %s\n",
- mic->name, strerror(errno));
- goto _close_pty;
- }
-
- copy.iovcnt = 1;
- desc = get_device_desc(mic, VIRTIO_ID_CONSOLE);
-
- for (;;) {
- console_poll[MONITOR_FD].revents = 0;
- console_poll[VIRTIO_CONSOLE_FD].revents = 0;
- err = poll(console_poll, MAX_CONSOLE_FD, -1);
- if (err < 0) {
- mpsslog("%s %d: poll failed: %s\n", __func__, __LINE__,
- strerror(errno));
- continue;
- }
- if (!(desc->status & VIRTIO_CONFIG_S_DRIVER_OK)) {
- err = wait_for_card_driver(mic,
- mic->mic_console.virtio_console_fd,
- VIRTIO_ID_CONSOLE);
- if (err) {
- mpsslog("%s %s %d Exiting...\n",
- mic->name, __func__, __LINE__);
- break;
- }
- }
-
- if (console_poll[MONITOR_FD].revents & POLLIN) {
- copy.iov = iov0;
- len = readv(pty_fd, copy.iov, copy.iovcnt);
- if (len > 0) {
-#ifdef DEBUG
- disp_iovec(mic, copy, __func__, __LINE__);
- mpsslog("%s %s %d read from tap 0x%lx\n",
- mic->name, __func__, __LINE__,
- len);
-#endif
- spin_for_descriptors(mic, &tx_vr);
- txrx_prepare(VIRTIO_ID_CONSOLE, 1, &tx_vr,
- &copy, len);
-
- err = mic_virtio_copy(mic,
- mic->mic_console.virtio_console_fd,
- &tx_vr, &copy);
- if (err < 0) {
- mpsslog("%s %s %d mic_virtio_copy %s\n",
- mic->name, __func__, __LINE__,
- strerror(errno));
- }
- if (!err)
- verify_out_len(mic, &copy);
-#ifdef DEBUG
- disp_iovec(mic, copy, __func__, __LINE__);
- mpsslog("%s %s %d wrote to net 0x%lx\n",
- mic->name, __func__, __LINE__,
- sum_iovec_len(copy));
-#endif
- /* Reinitialize IOV for next run */
- iov0->iov_len = PAGE_SIZE;
- } else if (len < 0) {
- disp_iovec(mic, &copy, __func__, __LINE__);
- mpsslog("%s %s %d read failed %s ",
- mic->name, __func__, __LINE__,
- strerror(errno));
- mpsslog("cnt %d sum %zd\n",
- copy.iovcnt, sum_iovec_len(&copy));
- }
- }
-
- if (console_poll[VIRTIO_CONSOLE_FD].revents & POLLIN) {
- while (rx_vr.info->avail_idx !=
- le16toh(rx_vr.vr.avail->idx)) {
- copy.iov = iov1;
- txrx_prepare(VIRTIO_ID_CONSOLE, 0, &rx_vr,
- &copy, PAGE_SIZE);
-
- err = mic_virtio_copy(mic,
- mic->mic_console.virtio_console_fd,
- &rx_vr, &copy);
- if (!err) {
- /* Set the correct output iov_len */
- iov1->iov_len = copy.out_len;
- verify_out_len(mic, &copy);
-#ifdef DEBUG
- disp_iovec(mic, copy, __func__,
- __LINE__);
- mpsslog("%s %s %d ",
- mic->name, __func__, __LINE__);
- mpsslog("read from net 0x%lx\n",
- sum_iovec_len(copy));
-#endif
- len = writev(pty_fd,
- copy.iov, copy.iovcnt);
- if (len != sum_iovec_len(&copy)) {
- mpsslog("Tun write failed %s ",
- strerror(errno));
- mpsslog("len 0x%zx ", len);
- mpsslog("read_len 0x%zx\n",
- sum_iovec_len(&copy));
- } else {
-#ifdef DEBUG
- disp_iovec(mic, copy, __func__,
- __LINE__);
- mpsslog("%s %s %d ",
- mic->name, __func__,
- __LINE__);
- mpsslog("wrote to tap 0x%lx\n",
- len);
-#endif
- }
- } else {
- mpsslog("%s %s %d mic_virtio_copy %s\n",
- mic->name, __func__, __LINE__,
- strerror(errno));
- break;
- }
- }
- }
- if (console_poll[NET_FD_VIRTIO_NET].revents & POLLERR)
- mpsslog("%s: %s: POLLERR\n", __func__, mic->name);
- }
-_close_pty:
- close(pty_fd);
-_return:
- pthread_exit(NULL);
-}
-
-static void
-add_virtio_device(struct mic_info *mic, struct mic_device_desc *dd)
-{
- char path[PATH_MAX];
- int fd, err;
-
- snprintf(path, PATH_MAX, "/dev/vop_virtio%d", mic->id);
- fd = open(path, O_RDWR);
- if (fd < 0) {
- mpsslog("Could not open %s %s\n", path, strerror(errno));
- return;
- }
-
- err = ioctl(fd, MIC_VIRTIO_ADD_DEVICE, dd);
- if (err < 0) {
- mpsslog("Could not add %d %s\n", dd->type, strerror(errno));
- close(fd);
- return;
- }
- switch (dd->type) {
- case VIRTIO_ID_NET:
- mic->mic_net.virtio_net_fd = fd;
- mpsslog("Added VIRTIO_ID_NET for %s\n", mic->name);
- break;
- case VIRTIO_ID_CONSOLE:
- mic->mic_console.virtio_console_fd = fd;
- mpsslog("Added VIRTIO_ID_CONSOLE for %s\n", mic->name);
- break;
- case VIRTIO_ID_BLOCK:
- mic->mic_virtblk.virtio_block_fd = fd;
- mpsslog("Added VIRTIO_ID_BLOCK for %s\n", mic->name);
- break;
- }
-}
-
-static bool
-set_backend_file(struct mic_info *mic)
-{
- FILE *config;
- char buff[PATH_MAX], *line, *evv, *p;
-
- snprintf(buff, PATH_MAX, "%s/mpssd%03d.conf", mic_config_dir, mic->id);
- config = fopen(buff, "r");
- if (config == NULL)
- return false;
- do { /* look for "virtblk_backend=XXXX" */
- line = fgets(buff, PATH_MAX, config);
- if (line == NULL)
- break;
- if (*line == '#')
- continue;
- p = strchr(line, '\n');
- if (p)
- *p = '\0';
- } while (strncmp(line, virtblk_backend, strlen(virtblk_backend)) != 0);
- fclose(config);
- if (line == NULL)
- return false;
- evv = strchr(line, '=');
- if (evv == NULL)
- return false;
- mic->mic_virtblk.backend_file = malloc(strlen(evv) + 1);
- if (mic->mic_virtblk.backend_file == NULL) {
- mpsslog("%s %d can't allocate memory\n", mic->name, mic->id);
- return false;
- }
- strcpy(mic->mic_virtblk.backend_file, evv + 1);
- return true;
-}
-
-#define SECTOR_SIZE 512
-static bool
-set_backend_size(struct mic_info *mic)
-{
- mic->mic_virtblk.backend_size = lseek(mic->mic_virtblk.backend, 0,
- SEEK_END);
- if (mic->mic_virtblk.backend_size < 0) {
- mpsslog("%s: can't seek: %s\n",
- mic->name, mic->mic_virtblk.backend_file);
- return false;
- }
- virtblk_dev_page.blk_config.capacity =
- mic->mic_virtblk.backend_size / SECTOR_SIZE;
- if ((mic->mic_virtblk.backend_size % SECTOR_SIZE) != 0)
- virtblk_dev_page.blk_config.capacity++;
-
- virtblk_dev_page.blk_config.capacity =
- htole64(virtblk_dev_page.blk_config.capacity);
-
- return true;
-}
-
-static bool
-open_backend(struct mic_info *mic)
-{
- if (!set_backend_file(mic))
- goto _error_exit;
- mic->mic_virtblk.backend = open(mic->mic_virtblk.backend_file, O_RDWR);
- if (mic->mic_virtblk.backend < 0) {
- mpsslog("%s: can't open: %s\n", mic->name,
- mic->mic_virtblk.backend_file);
- goto _error_free;
- }
- if (!set_backend_size(mic))
- goto _error_close;
- mic->mic_virtblk.backend_addr = mmap(NULL,
- mic->mic_virtblk.backend_size,
- PROT_READ|PROT_WRITE, MAP_SHARED,
- mic->mic_virtblk.backend, 0L);
- if (mic->mic_virtblk.backend_addr == MAP_FAILED) {
- mpsslog("%s: can't map: %s %s\n",
- mic->name, mic->mic_virtblk.backend_file,
- strerror(errno));
- goto _error_close;
- }
- return true;
-
- _error_close:
- close(mic->mic_virtblk.backend);
- _error_free:
- free(mic->mic_virtblk.backend_file);
- _error_exit:
- return false;
-}
-
-static void
-close_backend(struct mic_info *mic)
-{
- munmap(mic->mic_virtblk.backend_addr, mic->mic_virtblk.backend_size);
- close(mic->mic_virtblk.backend);
- free(mic->mic_virtblk.backend_file);
-}
-
-static bool
-start_virtblk(struct mic_info *mic, struct mic_vring *vring)
-{
- if (((unsigned long)&virtblk_dev_page.blk_config % 8) != 0) {
- mpsslog("%s: blk_config is not 8 byte aligned.\n",
- mic->name);
- return false;
- }
- add_virtio_device(mic, &virtblk_dev_page.dd);
- if (MAP_FAILED == init_vr(mic, mic->mic_virtblk.virtio_block_fd,
- VIRTIO_ID_BLOCK, vring, NULL,
- virtblk_dev_page.dd.num_vq)) {
- mpsslog("%s init_vr failed %s\n",
- mic->name, strerror(errno));
- return false;
- }
- return true;
-}
-
-static void
-stop_virtblk(struct mic_info *mic)
-{
- int vr_size, ret;
-
- vr_size = PAGE_ALIGN(_vring_size(MIC_VRING_ENTRIES,
- MIC_VIRTIO_RING_ALIGN) +
- sizeof(struct _mic_vring_info));
- ret = munmap(mic->mic_virtblk.block_dp,
- MIC_DEVICE_PAGE_END + vr_size * virtblk_dev_page.dd.num_vq);
- if (ret < 0)
- mpsslog("%s munmap errno %d\n", mic->name, errno);
- close(mic->mic_virtblk.virtio_block_fd);
-}
-
-static __u8
-header_error_check(struct vring_desc *desc)
-{
- if (le32toh(desc->len) != sizeof(struct virtio_blk_outhdr)) {
- mpsslog("%s() %d: length is not sizeof(virtio_blk_outhd)\n",
- __func__, __LINE__);
- return -EIO;
- }
- if (!(le16toh(desc->flags) & VRING_DESC_F_NEXT)) {
- mpsslog("%s() %d: alone\n",
- __func__, __LINE__);
- return -EIO;
- }
- if (le16toh(desc->flags) & VRING_DESC_F_WRITE) {
- mpsslog("%s() %d: not read\n",
- __func__, __LINE__);
- return -EIO;
- }
- return 0;
-}
-
-static int
-read_header(int fd, struct virtio_blk_outhdr *hdr, __u32 desc_idx)
-{
- struct iovec iovec;
- struct mic_copy_desc copy;
-
- iovec.iov_len = sizeof(*hdr);
- iovec.iov_base = hdr;
- copy.iov = &iovec;
- copy.iovcnt = 1;
- copy.vr_idx = 0; /* only one vring on virtio_block */
- copy.update_used = false; /* do not update used index */
- return ioctl(fd, MIC_VIRTIO_COPY_DESC, &copy);
-}
-
-static int
-transfer_blocks(int fd, struct iovec *iovec, __u32 iovcnt)
-{
- struct mic_copy_desc copy;
-
- copy.iov = iovec;
- copy.iovcnt = iovcnt;
- copy.vr_idx = 0; /* only one vring on virtio_block */
- copy.update_used = false; /* do not update used index */
- return ioctl(fd, MIC_VIRTIO_COPY_DESC, &copy);
-}
-
-static __u8
-status_error_check(struct vring_desc *desc)
-{
- if (le32toh(desc->len) != sizeof(__u8)) {
- mpsslog("%s() %d: length is not sizeof(status)\n",
- __func__, __LINE__);
- return -EIO;
- }
- return 0;
-}
-
-static int
-write_status(int fd, __u8 *status)
-{
- struct iovec iovec;
- struct mic_copy_desc copy;
-
- iovec.iov_base = status;
- iovec.iov_len = sizeof(*status);
- copy.iov = &iovec;
- copy.iovcnt = 1;
- copy.vr_idx = 0; /* only one vring on virtio_block */
- copy.update_used = true; /* Update used index */
- return ioctl(fd, MIC_VIRTIO_COPY_DESC, &copy);
-}
-
-#ifndef VIRTIO_BLK_T_GET_ID
-#define VIRTIO_BLK_T_GET_ID 8
-#endif
-
-static void *
-virtio_block(void *arg)
-{
- struct mic_info *mic = (struct mic_info *)arg;
- int ret;
- struct pollfd block_poll;
- struct mic_vring vring;
- __u16 avail_idx;
- __u32 desc_idx;
- struct vring_desc *desc;
- struct iovec *iovec, *piov;
- __u8 status;
- __u32 buffer_desc_idx;
- struct virtio_blk_outhdr hdr;
- void *fos;
-
- for (;;) { /* forever */
- if (!open_backend(mic)) { /* No virtblk */
- for (mic->mic_virtblk.signaled = 0;
- !mic->mic_virtblk.signaled;)
- sleep(1);
- continue;
- }
-
- /* backend file is specified. */
- if (!start_virtblk(mic, &vring))
- goto _close_backend;
- iovec = malloc(sizeof(*iovec) *
- le32toh(virtblk_dev_page.blk_config.seg_max));
- if (!iovec) {
- mpsslog("%s: can't alloc iovec: %s\n",
- mic->name, strerror(ENOMEM));
- goto _stop_virtblk;
- }
-
- block_poll.fd = mic->mic_virtblk.virtio_block_fd;
- block_poll.events = POLLIN;
- for (mic->mic_virtblk.signaled = 0;
- !mic->mic_virtblk.signaled;) {
- block_poll.revents = 0;
- /* timeout in 1 sec to see signaled */
- ret = poll(&block_poll, 1, 1000);
- if (ret < 0) {
- mpsslog("%s %d: poll failed: %s\n",
- __func__, __LINE__,
- strerror(errno));
- continue;
- }
-
- if (!(block_poll.revents & POLLIN)) {
-#ifdef DEBUG
- mpsslog("%s %d: block_poll.revents=0x%x\n",
- __func__, __LINE__, block_poll.revents);
-#endif
- continue;
- }
-
- /* POLLIN */
- while (vring.info->avail_idx !=
- le16toh(vring.vr.avail->idx)) {
- /* read header element */
- avail_idx =
- vring.info->avail_idx &
- (vring.vr.num - 1);
- desc_idx = le16toh(
- vring.vr.avail->ring[avail_idx]);
- desc = &vring.vr.desc[desc_idx];
-#ifdef DEBUG
- mpsslog("%s() %d: avail_idx=%d ",
- __func__, __LINE__,
- vring.info->avail_idx);
- mpsslog("vring.vr.num=%d desc=%p\n",
- vring.vr.num, desc);
-#endif
- status = header_error_check(desc);
- ret = read_header(
- mic->mic_virtblk.virtio_block_fd,
- &hdr, desc_idx);
- if (ret < 0) {
- mpsslog("%s() %d %s: ret=%d %s\n",
- __func__, __LINE__,
- mic->name, ret,
- strerror(errno));
- break;
- }
- /* buffer element */
- piov = iovec;
- status = 0;
- fos = mic->mic_virtblk.backend_addr +
- (hdr.sector * SECTOR_SIZE);
- buffer_desc_idx = next_desc(desc);
- desc_idx = buffer_desc_idx;
- for (desc = &vring.vr.desc[buffer_desc_idx];
- desc->flags & VRING_DESC_F_NEXT;
- desc_idx = next_desc(desc),
- desc = &vring.vr.desc[desc_idx]) {
- piov->iov_len = desc->len;
- piov->iov_base = fos;
- piov++;
- fos += desc->len;
- }
- /* Returning NULLs for VIRTIO_BLK_T_GET_ID. */
- if (hdr.type & ~(VIRTIO_BLK_T_OUT |
- VIRTIO_BLK_T_GET_ID)) {
- /*
- VIRTIO_BLK_T_IN - does not do
- anything. Probably for documenting.
- VIRTIO_BLK_T_SCSI_CMD - for
- virtio_scsi.
- VIRTIO_BLK_T_FLUSH - turned off in
- config space.
- VIRTIO_BLK_T_BARRIER - defined but not
- used in anywhere.
- */
- mpsslog("%s() %d: type %x ",
- __func__, __LINE__,
- hdr.type);
- mpsslog("is not supported\n");
- status = -ENOTSUP;
-
- } else {
- ret = transfer_blocks(
- mic->mic_virtblk.virtio_block_fd,
- iovec,
- piov - iovec);
- if (ret < 0 &&
- status != 0)
- status = ret;
- }
- /* write status and update used pointer */
- if (status != 0)
- status = status_error_check(desc);
- ret = write_status(
- mic->mic_virtblk.virtio_block_fd,
- &status);
-#ifdef DEBUG
- mpsslog("%s() %d: write status=%d on desc=%p\n",
- __func__, __LINE__,
- status, desc);
-#endif
- }
- }
- free(iovec);
-_stop_virtblk:
- stop_virtblk(mic);
-_close_backend:
- close_backend(mic);
- } /* forever */
-
- pthread_exit(NULL);
-}
-
-static void
-reset(struct mic_info *mic)
-{
-#define RESET_TIMEOUT 120
- int i = RESET_TIMEOUT;
- setsysfs(mic->name, "state", "reset");
- while (i) {
- char *state;
- state = readsysfs(mic->name, "state");
- if (!state)
- goto retry;
- mpsslog("%s: %s %d state %s\n",
- mic->name, __func__, __LINE__, state);
-
- if (!strcmp(state, "ready")) {
- free(state);
- break;
- }
- free(state);
-retry:
- sleep(1);
- i--;
- }
-}
-
-static int
-get_mic_shutdown_status(struct mic_info *mic, char *shutdown_status)
-{
- if (!strcmp(shutdown_status, "nop"))
- return MIC_NOP;
- if (!strcmp(shutdown_status, "crashed"))
- return MIC_CRASHED;
- if (!strcmp(shutdown_status, "halted"))
- return MIC_HALTED;
- if (!strcmp(shutdown_status, "poweroff"))
- return MIC_POWER_OFF;
- if (!strcmp(shutdown_status, "restart"))
- return MIC_RESTART;
- mpsslog("%s: BUG invalid status %s\n", mic->name, shutdown_status);
- /* Invalid state */
- assert(0);
-};
-
-static int get_mic_state(struct mic_info *mic)
-{
- char *state = NULL;
- enum mic_states mic_state;
-
- while (!state) {
- state = readsysfs(mic->name, "state");
- sleep(1);
- }
- mpsslog("%s: %s %d state %s\n",
- mic->name, __func__, __LINE__, state);
-
- if (!strcmp(state, "ready")) {
- mic_state = MIC_READY;
- } else if (!strcmp(state, "booting")) {
- mic_state = MIC_BOOTING;
- } else if (!strcmp(state, "online")) {
- mic_state = MIC_ONLINE;
- } else if (!strcmp(state, "shutting_down")) {
- mic_state = MIC_SHUTTING_DOWN;
- } else if (!strcmp(state, "reset_failed")) {
- mic_state = MIC_RESET_FAILED;
- } else if (!strcmp(state, "resetting")) {
- mic_state = MIC_RESETTING;
- } else {
- mpsslog("%s: BUG invalid state %s\n", mic->name, state);
- assert(0);
- }
-
- free(state);
- return mic_state;
-};
-
-static void mic_handle_shutdown(struct mic_info *mic)
-{
-#define SHUTDOWN_TIMEOUT 60
- int i = SHUTDOWN_TIMEOUT;
- char *shutdown_status;
- while (i) {
- shutdown_status = readsysfs(mic->name, "shutdown_status");
- if (!shutdown_status) {
- sleep(1);
- continue;
- }
- mpsslog("%s: %s %d shutdown_status %s\n",
- mic->name, __func__, __LINE__, shutdown_status);
- switch (get_mic_shutdown_status(mic, shutdown_status)) {
- case MIC_RESTART:
- mic->restart = 1;
- case MIC_HALTED:
- case MIC_POWER_OFF:
- case MIC_CRASHED:
- free(shutdown_status);
- goto reset;
- default:
- break;
- }
- free(shutdown_status);
- sleep(1);
- i--;
- }
-reset:
- if (!i)
- mpsslog("%s: %s %d timing out waiting for shutdown_status %s\n",
- mic->name, __func__, __LINE__, shutdown_status);
- reset(mic);
-}
-
-static int open_state_fd(struct mic_info *mic)
-{
- char pathname[PATH_MAX];
- int fd;
-
- snprintf(pathname, PATH_MAX - 1, "%s/%s/%s",
- MICSYSFSDIR, mic->name, "state");
-
- fd = open(pathname, O_RDONLY);
- if (fd < 0)
- mpsslog("%s: opening file %s failed %s\n",
- mic->name, pathname, strerror(errno));
- return fd;
-}
-
-static int block_till_state_change(int fd, struct mic_info *mic)
-{
- struct pollfd ufds[1];
- char value[PAGE_SIZE];
- int ret;
-
- ufds[0].fd = fd;
- ufds[0].events = POLLERR | POLLPRI;
- ret = poll(ufds, 1, -1);
- if (ret < 0) {
- mpsslog("%s: %s %d poll failed %s\n",
- mic->name, __func__, __LINE__, strerror(errno));
- return ret;
- }
-
- ret = lseek(fd, 0, SEEK_SET);
- if (ret < 0) {
- mpsslog("%s: %s %d Failed to seek to 0: %s\n",
- mic->name, __func__, __LINE__, strerror(errno));
- return ret;
- }
-
- ret = read(fd, value, sizeof(value));
- if (ret < 0) {
- mpsslog("%s: %s %d Failed to read sysfs entry: %s\n",
- mic->name, __func__, __LINE__, strerror(errno));
- return ret;
- }
-
- return 0;
-}
-
-static void *
-mic_config(void *arg)
-{
- struct mic_info *mic = (struct mic_info *)arg;
- int fd, ret, stat = 0;
-
- fd = open_state_fd(mic);
- if (fd < 0) {
- mpsslog("%s: %s %d open state fd failed %s\n",
- mic->name, __func__, __LINE__, strerror(errno));
- goto exit;
- }
-
- do {
- ret = block_till_state_change(fd, mic);
- if (ret < 0) {
- mpsslog("%s: %s %d block_till_state_change error %s\n",
- mic->name, __func__, __LINE__, strerror(errno));
- goto close_exit;
- }
-
- switch (get_mic_state(mic)) {
- case MIC_SHUTTING_DOWN:
- mic_handle_shutdown(mic);
- break;
- case MIC_READY:
- case MIC_RESET_FAILED:
- ret = kill(mic->pid, SIGTERM);
- mpsslog("%s: %s %d kill pid %d ret %d\n",
- mic->name, __func__, __LINE__,
- mic->pid, ret);
- if (!ret) {
- ret = waitpid(mic->pid, &stat,
- WIFSIGNALED(stat));
- mpsslog("%s: %s %d waitpid ret %d pid %d\n",
- mic->name, __func__, __LINE__,
- ret, mic->pid);
- }
- if (mic->boot_on_resume) {
- setsysfs(mic->name, "state", "boot");
- mic->boot_on_resume = 0;
- }
- goto close_exit;
- default:
- break;
- }
- } while (1);
-
-close_exit:
- close(fd);
-exit:
- init_mic(mic);
- pthread_exit(NULL);
-}
-
-static void
-set_cmdline(struct mic_info *mic)
-{
- char buffer[PATH_MAX];
- int len;
-
- len = snprintf(buffer, PATH_MAX,
- "clocksource=tsc highres=off nohz=off ");
- len += snprintf(buffer + len, PATH_MAX - len,
- "cpufreq_on;corec6_off;pc3_off;pc6_off ");
- len += snprintf(buffer + len, PATH_MAX - len,
- "ifcfg=static;address,172.31.%d.1;netmask,255.255.255.0",
- mic->id + 1);
-
- setsysfs(mic->name, "cmdline", buffer);
- mpsslog("%s: Command line: \"%s\"\n", mic->name, buffer);
- snprintf(buffer, PATH_MAX, "172.31.%d.1", mic->id + 1);
- mpsslog("%s: IPADDR: \"%s\"\n", mic->name, buffer);
-}
-
-static void
-set_log_buf_info(struct mic_info *mic)
-{
- int fd;
- off_t len;
- char system_map[] = "/lib/firmware/mic/System.map";
- char *map, *temp, log_buf[17] = {'\0'};
-
- fd = open(system_map, O_RDONLY);
- if (fd < 0) {
- mpsslog("%s: Opening System.map failed: %d\n",
- mic->name, errno);
- return;
- }
- len = lseek(fd, 0, SEEK_END);
- if (len < 0) {
- mpsslog("%s: Reading System.map size failed: %d\n",
- mic->name, errno);
- close(fd);
- return;
- }
- map = mmap(NULL, len, PROT_READ, MAP_PRIVATE, fd, 0);
- if (map == MAP_FAILED) {
- mpsslog("%s: mmap of System.map failed: %d\n",
- mic->name, errno);
- close(fd);
- return;
- }
- temp = strstr(map, "__log_buf");
- if (!temp) {
- mpsslog("%s: __log_buf not found: %d\n", mic->name, errno);
- munmap(map, len);
- close(fd);
- return;
- }
- strncpy(log_buf, temp - 19, 16);
- setsysfs(mic->name, "log_buf_addr", log_buf);
- mpsslog("%s: log_buf_addr: %s\n", mic->name, log_buf);
- temp = strstr(map, "log_buf_len");
- if (!temp) {
- mpsslog("%s: log_buf_len not found: %d\n", mic->name, errno);
- munmap(map, len);
- close(fd);
- return;
- }
- strncpy(log_buf, temp - 19, 16);
- setsysfs(mic->name, "log_buf_len", log_buf);
- mpsslog("%s: log_buf_len: %s\n", mic->name, log_buf);
- munmap(map, len);
- close(fd);
-}
-
-static void
-change_virtblk_backend(int x, siginfo_t *siginfo, void *p)
-{
- struct mic_info *mic;
-
- for (mic = mic_list.next; mic != NULL; mic = mic->next)
- mic->mic_virtblk.signaled = 1/* true */;
-}
-
-static void
-set_mic_boot_params(struct mic_info *mic)
-{
- set_log_buf_info(mic);
- set_cmdline(mic);
-}
-
-static void *
-init_mic(void *arg)
-{
- struct mic_info *mic = (struct mic_info *)arg;
- struct sigaction ignore = {
- .sa_flags = 0,
- .sa_handler = SIG_IGN
- };
- struct sigaction act = {
- .sa_flags = SA_SIGINFO,
- .sa_sigaction = change_virtblk_backend,
- };
- char buffer[PATH_MAX];
- int err, fd;
-
- /*
- * Currently, one virtio block device is supported for each MIC card
- * at a time. Any user (or test) can send a SIGUSR1 to the MIC daemon.
- * The signal informs the virtio block backend about a change in the
- * configuration file which specifies the virtio backend file name on
- * the host. Virtio block backend then re-reads the configuration file
- * and switches to the new block device. This signalling mechanism may
- * not be required once multiple virtio block devices are supported by
- * the MIC daemon.
- */
- sigaction(SIGUSR1, &ignore, NULL);
-retry:
- fd = open_state_fd(mic);
- if (fd < 0) {
- mpsslog("%s: %s %d open state fd failed %s\n",
- mic->name, __func__, __LINE__, strerror(errno));
- sleep(2);
- goto retry;
- }
-
- if (mic->restart) {
- snprintf(buffer, PATH_MAX, "boot");
- setsysfs(mic->name, "state", buffer);
- mpsslog("%s restarting mic %d\n",
- mic->name, mic->restart);
- mic->restart = 0;
- }
-
- while (1) {
- while (block_till_state_change(fd, mic)) {
- mpsslog("%s: %s %d block_till_state_change error %s\n",
- mic->name, __func__, __LINE__, strerror(errno));
- sleep(2);
- continue;
- }
-
- if (get_mic_state(mic) == MIC_BOOTING)
- break;
- }
-
- mic->pid = fork();
- switch (mic->pid) {
- case 0:
- add_virtio_device(mic, &virtcons_dev_page.dd);
- add_virtio_device(mic, &virtnet_dev_page.dd);
- err = pthread_create(&mic->mic_console.console_thread, NULL,
- virtio_console, mic);
- if (err)
- mpsslog("%s virtcons pthread_create failed %s\n",
- mic->name, strerror(err));
- err = pthread_create(&mic->mic_net.net_thread, NULL,
- virtio_net, mic);
- if (err)
- mpsslog("%s virtnet pthread_create failed %s\n",
- mic->name, strerror(err));
- err = pthread_create(&mic->mic_virtblk.block_thread, NULL,
- virtio_block, mic);
- if (err)
- mpsslog("%s virtblk pthread_create failed %s\n",
- mic->name, strerror(err));
- sigemptyset(&act.sa_mask);
- err = sigaction(SIGUSR1, &act, NULL);
- if (err)
- mpsslog("%s sigaction SIGUSR1 failed %s\n",
- mic->name, strerror(errno));
- while (1)
- sleep(60);
- case -1:
- mpsslog("fork failed MIC name %s id %d errno %d\n",
- mic->name, mic->id, errno);
- break;
- default:
- err = pthread_create(&mic->config_thread, NULL,
- mic_config, mic);
- if (err)
- mpsslog("%s mic_config pthread_create failed %s\n",
- mic->name, strerror(err));
- }
-
- return NULL;
-}
-
-static void
-start_daemon(void)
-{
- struct mic_info *mic;
- int err;
-
- for (mic = mic_list.next; mic; mic = mic->next) {
- set_mic_boot_params(mic);
- err = pthread_create(&mic->init_thread, NULL, init_mic, mic);
- if (err)
- mpsslog("%s init_mic pthread_create failed %s\n",
- mic->name, strerror(err));
- }
-
- while (1)
- sleep(60);
-}
-
-static int
-init_mic_list(void)
-{
- struct mic_info *mic = &mic_list;
- struct dirent *file;
- DIR *dp;
- int cnt = 0;
-
- dp = opendir(MICSYSFSDIR);
- if (!dp)
- return 0;
-
- while ((file = readdir(dp)) != NULL) {
- if (!strncmp(file->d_name, "mic", 3)) {
- mic->next = calloc(1, sizeof(struct mic_info));
- if (mic->next) {
- mic = mic->next;
- mic->id = atoi(&file->d_name[3]);
- mic->name = malloc(strlen(file->d_name) + 16);
- if (mic->name)
- strcpy(mic->name, file->d_name);
- mpsslog("MIC name %s id %d\n", mic->name,
- mic->id);
- cnt++;
- }
- }
- }
-
- closedir(dp);
- return cnt;
-}
-
-void
-mpsslog(char *format, ...)
-{
- va_list args;
- char buffer[4096];
- char ts[52], *ts1;
- time_t t;
-
- if (logfp == NULL)
- return;
-
- va_start(args, format);
- vsprintf(buffer, format, args);
- va_end(args);
-
- time(&t);
- ts1 = ctime_r(&t, ts);
- ts1[strlen(ts1) - 1] = '\0';
- fprintf(logfp, "%s: %s", ts1, buffer);
-
- fflush(logfp);
-}
-
-int
-main(int argc, char *argv[])
-{
- int cnt;
- pid_t pid;
-
- myname = argv[0];
-
- logfp = fopen(LOGFILE_NAME, "a+");
- if (!logfp) {
- fprintf(stderr, "cannot open logfile '%s'\n", LOGFILE_NAME);
- exit(1);
- }
- pid = fork();
- switch (pid) {
- case 0:
- break;
- case -1:
- exit(2);
- default:
- exit(0);
- }
-
- mpsslog("MIC Daemon start\n");
-
- cnt = init_mic_list();
- if (cnt == 0) {
- mpsslog("MIC module not loaded\n");
- exit(3);
- }
- mpsslog("MIC found %d devices\n", cnt);
-
- start_daemon();
-
- exit(0);
-}
diff --git a/samples/mic/mpssd/mpssd.h b/samples/mic/mpssd/mpssd.h
deleted file mode 100644
index 5f98bdafe653..000000000000
--- a/samples/mic/mpssd/mpssd.h
+++ /dev/null
@@ -1,89 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * Intel MIC Platform Software Stack (MPSS)
- *
- * Copyright(c) 2013 Intel Corporation.
- *
- * Intel MIC User Space Tools.
- */
-#ifndef _MPSSD_H_
-#define _MPSSD_H_
-
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <fcntl.h>
-#include <unistd.h>
-#include <dirent.h>
-#include <libgen.h>
-#include <pthread.h>
-#include <stdarg.h>
-#include <time.h>
-#include <errno.h>
-#include <sys/dir.h>
-#include <sys/ioctl.h>
-#include <sys/poll.h>
-#include <sys/types.h>
-#include <sys/socket.h>
-#include <sys/stat.h>
-#include <sys/mman.h>
-#include <sys/utsname.h>
-#include <sys/wait.h>
-#include <netinet/in.h>
-#include <arpa/inet.h>
-#include <netdb.h>
-#include <signal.h>
-#include <limits.h>
-#include <syslog.h>
-#include <getopt.h>
-#include <net/if.h>
-#include <linux/if_tun.h>
-#include <linux/virtio_ids.h>
-
-#define MICSYSFSDIR "/sys/class/mic"
-#define LOGFILE_NAME "/var/log/mpssd"
-#define PAGE_SIZE 4096
-
-struct mic_console_info {
- pthread_t console_thread;
- int virtio_console_fd;
- void *console_dp;
-};
-
-struct mic_net_info {
- pthread_t net_thread;
- int virtio_net_fd;
- int tap_fd;
- void *net_dp;
-};
-
-struct mic_virtblk_info {
- pthread_t block_thread;
- int virtio_block_fd;
- void *block_dp;
- volatile sig_atomic_t signaled;
- char *backend_file;
- int backend;
- void *backend_addr;
- long backend_size;
-};
-
-struct mic_info {
- int id;
- char *name;
- pthread_t config_thread;
- pthread_t init_thread;
- pid_t pid;
- struct mic_console_info mic_console;
- struct mic_net_info mic_net;
- struct mic_virtblk_info mic_virtblk;
- int restart;
- int boot_on_resume;
- struct mic_info *next;
-};
-
-__attribute__((format(printf, 1, 2)))
-void mpsslog(char *format, ...);
-char *readsysfs(char *dir, char *entry);
-int setsysfs(char *dir, char *entry, char *value);
-#endif
diff --git a/samples/mic/mpssd/sysfs.c b/samples/mic/mpssd/sysfs.c
deleted file mode 100644
index 3fb08eb7ed9d..000000000000
--- a/samples/mic/mpssd/sysfs.c
+++ /dev/null
@@ -1,91 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * Intel MIC Platform Software Stack (MPSS)
- *
- * Copyright(c) 2013 Intel Corporation.
- *
- * Intel MIC User Space Tools.
- */
-
-#include "mpssd.h"
-
-#define PAGE_SIZE 4096
-
-char *
-readsysfs(char *dir, char *entry)
-{
- char filename[PATH_MAX];
- char value[PAGE_SIZE];
- char *string = NULL;
- int fd;
- int len;
-
- if (dir == NULL)
- snprintf(filename, PATH_MAX, "%s/%s", MICSYSFSDIR, entry);
- else
- snprintf(filename, PATH_MAX,
- "%s/%s/%s", MICSYSFSDIR, dir, entry);
-
- fd = open(filename, O_RDONLY);
- if (fd < 0) {
- mpsslog("Failed to open sysfs entry '%s': %s\n",
- filename, strerror(errno));
- return NULL;
- }
-
- len = read(fd, value, sizeof(value));
- if (len < 0) {
- mpsslog("Failed to read sysfs entry '%s': %s\n",
- filename, strerror(errno));
- goto readsys_ret;
- }
- if (len == 0)
- goto readsys_ret;
-
- value[len - 1] = '\0';
-
- string = malloc(strlen(value) + 1);
- if (string)
- strcpy(string, value);
-
-readsys_ret:
- close(fd);
- return string;
-}
-
-int
-setsysfs(char *dir, char *entry, char *value)
-{
- char filename[PATH_MAX];
- char *oldvalue;
- int fd, ret = 0;
-
- if (dir == NULL)
- snprintf(filename, PATH_MAX, "%s/%s", MICSYSFSDIR, entry);
- else
- snprintf(filename, PATH_MAX, "%s/%s/%s",
- MICSYSFSDIR, dir, entry);
-
- oldvalue = readsysfs(dir, entry);
-
- fd = open(filename, O_RDWR);
- if (fd < 0) {
- ret = errno;
- mpsslog("Failed to open sysfs entry '%s': %s\n",
- filename, strerror(errno));
- goto done;
- }
-
- if (!oldvalue || strcmp(value, oldvalue)) {
- if (write(fd, value, strlen(value)) < 0) {
- ret = errno;
- mpsslog("Failed to write new sysfs entry '%s': %s\n",
- filename, strerror(errno));
- }
- }
- close(fd);
-done:
- if (oldvalue)
- free(oldvalue);
- return ret;
-}
diff --git a/samples/nitro_enclaves/.gitignore b/samples/nitro_enclaves/.gitignore
new file mode 100644
index 000000000000..6a718eec71f4
--- /dev/null
+++ b/samples/nitro_enclaves/.gitignore
@@ -0,0 +1,2 @@
+# SPDX-License-Identifier: GPL-2.0
+/ne_ioctl_sample
diff --git a/samples/nitro_enclaves/Makefile b/samples/nitro_enclaves/Makefile
new file mode 100644
index 000000000000..a3ec78fefb52
--- /dev/null
+++ b/samples/nitro_enclaves/Makefile
@@ -0,0 +1,16 @@
+# SPDX-License-Identifier: GPL-2.0
+#
+# Copyright 2020 Amazon.com, Inc. or its affiliates. All Rights Reserved.
+
+# Enclave lifetime management support for Nitro Enclaves (NE) - ioctl sample
+# usage.
+
+.PHONY: all clean
+
+CFLAGS += -Wall
+
+all:
+ $(CC) $(CFLAGS) -o ne_ioctl_sample ne_ioctl_sample.c -lpthread
+
+clean:
+ rm -f ne_ioctl_sample
diff --git a/samples/nitro_enclaves/ne_ioctl_sample.c b/samples/nitro_enclaves/ne_ioctl_sample.c
new file mode 100644
index 000000000000..765b131c7319
--- /dev/null
+++ b/samples/nitro_enclaves/ne_ioctl_sample.c
@@ -0,0 +1,882 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright 2020-2021 Amazon.com, Inc. or its affiliates. All Rights Reserved.
+ */
+
+/**
+ * DOC: Sample flow of using the ioctl interface provided by the Nitro Enclaves (NE)
+ * kernel driver.
+ *
+ * Usage
+ * -----
+ *
+ * Load the nitro_enclaves module, setting also the enclave CPU pool. The
+ * enclave CPUs need to be full cores from the same NUMA node. CPU 0 and its
+ * siblings have to remain available for the primary / parent VM, so they
+ * cannot be included in the enclave CPU pool.
+ *
+ * See the cpu list section from the kernel documentation.
+ * https://www.kernel.org/doc/html/latest/admin-guide/kernel-parameters.html#cpu-lists
+ *
+ * insmod drivers/virt/nitro_enclaves/nitro_enclaves.ko
+ * lsmod
+ *
+ * The CPU pool can be set at runtime, after the kernel module is loaded.
+ *
+ * echo <cpu-list> > /sys/module/nitro_enclaves/parameters/ne_cpus
+ *
+ * NUMA and CPU siblings information can be found using:
+ *
+ * lscpu
+ * /proc/cpuinfo
+ *
+ * Check the online / offline CPU list. The CPUs from the pool should be
+ * offlined.
+ *
+ * lscpu
+ *
+ * Check dmesg for any warnings / errors through the NE driver lifetime / usage.
+ * The NE logs contain the "nitro_enclaves" or "pci 0000:00:02.0" pattern.
+ *
+ * dmesg
+ *
+ * Setup hugetlbfs huge pages. The memory needs to be from the same NUMA node as
+ * the enclave CPUs.
+ *
+ * https://www.kernel.org/doc/html/latest/admin-guide/mm/hugetlbpage.html
+ *
+ * By default, the allocation of hugetlb pages are distributed on all possible
+ * NUMA nodes. Use the following configuration files to set the number of huge
+ * pages from a NUMA node:
+ *
+ * /sys/devices/system/node/node<X>/hugepages/hugepages-2048kB/nr_hugepages
+ * /sys/devices/system/node/node<X>/hugepages/hugepages-1048576kB/nr_hugepages
+ *
+ * or, if not on a system with multiple NUMA nodes, can also set the number
+ * of 2 MiB / 1 GiB huge pages using
+ *
+ * /sys/kernel/mm/hugepages/hugepages-2048kB/nr_hugepages
+ * /sys/kernel/mm/hugepages/hugepages-1048576kB/nr_hugepages
+ *
+ * In this example 256 hugepages of 2 MiB are used.
+ *
+ * Build and run the NE sample.
+ *
+ * make -C samples/nitro_enclaves clean
+ * make -C samples/nitro_enclaves
+ * ./samples/nitro_enclaves/ne_ioctl_sample <path_to_enclave_image>
+ *
+ * Unload the nitro_enclaves module.
+ *
+ * rmmod nitro_enclaves
+ * lsmod
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <limits.h>
+#include <poll.h>
+#include <pthread.h>
+#include <string.h>
+#include <sys/eventfd.h>
+#include <sys/ioctl.h>
+#include <sys/mman.h>
+#include <sys/socket.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <unistd.h>
+
+#include <linux/mman.h>
+#include <linux/nitro_enclaves.h>
+#include <linux/vm_sockets.h>
+
+/**
+ * NE_DEV_NAME - Nitro Enclaves (NE) misc device that provides the ioctl interface.
+ */
+#define NE_DEV_NAME "/dev/nitro_enclaves"
+
+/**
+ * NE_POLL_WAIT_TIME - Timeout in seconds for each poll event.
+ */
+#define NE_POLL_WAIT_TIME (60)
+/**
+ * NE_POLL_WAIT_TIME_MS - Timeout in milliseconds for each poll event.
+ */
+#define NE_POLL_WAIT_TIME_MS (NE_POLL_WAIT_TIME * 1000)
+
+/**
+ * NE_SLEEP_TIME - Amount of time in seconds for the process to keep the enclave alive.
+ */
+#define NE_SLEEP_TIME (300)
+
+/**
+ * NE_DEFAULT_NR_VCPUS - Default number of vCPUs set for an enclave.
+ */
+#define NE_DEFAULT_NR_VCPUS (2)
+
+/**
+ * NE_MIN_MEM_REGION_SIZE - Minimum size of a memory region - 2 MiB.
+ */
+#define NE_MIN_MEM_REGION_SIZE (2 * 1024 * 1024)
+
+/**
+ * NE_DEFAULT_NR_MEM_REGIONS - Default number of memory regions of 2 MiB set for
+ * an enclave.
+ */
+#define NE_DEFAULT_NR_MEM_REGIONS (256)
+
+/**
+ * NE_IMAGE_LOAD_HEARTBEAT_CID - Vsock CID for enclave image loading heartbeat logic.
+ */
+#define NE_IMAGE_LOAD_HEARTBEAT_CID (3)
+/**
+ * NE_IMAGE_LOAD_HEARTBEAT_PORT - Vsock port for enclave image loading heartbeat logic.
+ */
+#define NE_IMAGE_LOAD_HEARTBEAT_PORT (9000)
+/**
+ * NE_IMAGE_LOAD_HEARTBEAT_VALUE - Heartbeat value for enclave image loading.
+ */
+#define NE_IMAGE_LOAD_HEARTBEAT_VALUE (0xb7)
+
+/**
+ * struct ne_user_mem_region - User space memory region set for an enclave.
+ * @userspace_addr: Address of the user space memory region.
+ * @memory_size: Size of the user space memory region.
+ */
+struct ne_user_mem_region {
+ void *userspace_addr;
+ size_t memory_size;
+};
+
+/**
+ * ne_create_vm() - Create a slot for the enclave VM.
+ * @ne_dev_fd: The file descriptor of the NE misc device.
+ * @slot_uid: The generated slot uid for the enclave.
+ * @enclave_fd : The generated file descriptor for the enclave.
+ *
+ * Context: Process context.
+ * Return:
+ * * 0 on success.
+ * * Negative return value on failure.
+ */
+static int ne_create_vm(int ne_dev_fd, unsigned long *slot_uid, int *enclave_fd)
+{
+ int rc = -EINVAL;
+ *enclave_fd = ioctl(ne_dev_fd, NE_CREATE_VM, slot_uid);
+
+ if (*enclave_fd < 0) {
+ rc = *enclave_fd;
+ switch (errno) {
+ case NE_ERR_NO_CPUS_AVAIL_IN_POOL: {
+ printf("Error in create VM, no CPUs available in the NE CPU pool\n");
+
+ break;
+ }
+
+ default:
+ printf("Error in create VM [%m]\n");
+ }
+
+ return rc;
+ }
+
+ return 0;
+}
+
+/**
+ * ne_poll_enclave_fd() - Thread function for polling the enclave fd.
+ * @data: Argument provided for the polling function.
+ *
+ * Context: Process context.
+ * Return:
+ * * NULL on success / failure.
+ */
+void *ne_poll_enclave_fd(void *data)
+{
+ int enclave_fd = *(int *)data;
+ struct pollfd fds[1] = {};
+ int i = 0;
+ int rc = -EINVAL;
+
+ printf("Running from poll thread, enclave fd %d\n", enclave_fd);
+
+ fds[0].fd = enclave_fd;
+ fds[0].events = POLLIN | POLLERR | POLLHUP;
+
+ /* Keep on polling until the current process is terminated. */
+ while (1) {
+ printf("[iter %d] Polling ...\n", i);
+
+ rc = poll(fds, 1, NE_POLL_WAIT_TIME_MS);
+ if (rc < 0) {
+ printf("Error in poll [%m]\n");
+
+ return NULL;
+ }
+
+ i++;
+
+ if (!rc) {
+ printf("Poll: %d seconds elapsed\n",
+ i * NE_POLL_WAIT_TIME);
+
+ continue;
+ }
+
+ printf("Poll received value 0x%x\n", fds[0].revents);
+
+ if (fds[0].revents & POLLHUP) {
+ printf("Received POLLHUP\n");
+
+ return NULL;
+ }
+
+ if (fds[0].revents & POLLNVAL) {
+ printf("Received POLLNVAL\n");
+
+ return NULL;
+ }
+ }
+
+ return NULL;
+}
+
+/**
+ * ne_alloc_user_mem_region() - Allocate a user space memory region for an enclave.
+ * @ne_user_mem_region: User space memory region allocated using hugetlbfs.
+ *
+ * Context: Process context.
+ * Return:
+ * * 0 on success.
+ * * Negative return value on failure.
+ */
+static int ne_alloc_user_mem_region(struct ne_user_mem_region *ne_user_mem_region)
+{
+ /**
+ * Check available hugetlb encodings for different huge page sizes in
+ * include/uapi/linux/mman.h.
+ */
+ ne_user_mem_region->userspace_addr = mmap(NULL, ne_user_mem_region->memory_size,
+ PROT_READ | PROT_WRITE,
+ MAP_PRIVATE | MAP_ANONYMOUS |
+ MAP_HUGETLB | MAP_HUGE_2MB, -1, 0);
+ if (ne_user_mem_region->userspace_addr == MAP_FAILED) {
+ printf("Error in mmap memory [%m]\n");
+
+ return -1;
+ }
+
+ return 0;
+}
+
+/**
+ * ne_load_enclave_image() - Place the enclave image in the enclave memory.
+ * @enclave_fd : The file descriptor associated with the enclave.
+ * @ne_user_mem_regions: User space memory regions allocated for the enclave.
+ * @enclave_image_path : The file path of the enclave image.
+ *
+ * Context: Process context.
+ * Return:
+ * * 0 on success.
+ * * Negative return value on failure.
+ */
+static int ne_load_enclave_image(int enclave_fd, struct ne_user_mem_region ne_user_mem_regions[],
+ char *enclave_image_path)
+{
+ unsigned char *enclave_image = NULL;
+ int enclave_image_fd = -1;
+ size_t enclave_image_size = 0;
+ size_t enclave_memory_size = 0;
+ unsigned long i = 0;
+ size_t image_written_bytes = 0;
+ struct ne_image_load_info image_load_info = {
+ .flags = NE_EIF_IMAGE,
+ };
+ struct stat image_stat_buf = {};
+ int rc = -EINVAL;
+ size_t temp_image_offset = 0;
+
+ for (i = 0; i < NE_DEFAULT_NR_MEM_REGIONS; i++)
+ enclave_memory_size += ne_user_mem_regions[i].memory_size;
+
+ rc = stat(enclave_image_path, &image_stat_buf);
+ if (rc < 0) {
+ printf("Error in get image stat info [%m]\n");
+
+ return rc;
+ }
+
+ enclave_image_size = image_stat_buf.st_size;
+
+ if (enclave_memory_size < enclave_image_size) {
+ printf("The enclave memory is smaller than the enclave image size\n");
+
+ return -ENOMEM;
+ }
+
+ rc = ioctl(enclave_fd, NE_GET_IMAGE_LOAD_INFO, &image_load_info);
+ if (rc < 0) {
+ switch (errno) {
+ case NE_ERR_NOT_IN_INIT_STATE: {
+ printf("Error in get image load info, enclave not in init state\n");
+
+ break;
+ }
+
+ case NE_ERR_INVALID_FLAG_VALUE: {
+ printf("Error in get image load info, provided invalid flag\n");
+
+ break;
+ }
+
+ default:
+ printf("Error in get image load info [%m]\n");
+ }
+
+ return rc;
+ }
+
+ printf("Enclave image offset in enclave memory is %lld\n",
+ image_load_info.memory_offset);
+
+ enclave_image_fd = open(enclave_image_path, O_RDONLY);
+ if (enclave_image_fd < 0) {
+ printf("Error in open enclave image file [%m]\n");
+
+ return enclave_image_fd;
+ }
+
+ enclave_image = mmap(NULL, enclave_image_size, PROT_READ,
+ MAP_PRIVATE, enclave_image_fd, 0);
+ if (enclave_image == MAP_FAILED) {
+ printf("Error in mmap enclave image [%m]\n");
+
+ return -1;
+ }
+
+ temp_image_offset = image_load_info.memory_offset;
+
+ for (i = 0; i < NE_DEFAULT_NR_MEM_REGIONS; i++) {
+ size_t bytes_to_write = 0;
+ size_t memory_offset = 0;
+ size_t memory_size = ne_user_mem_regions[i].memory_size;
+ size_t remaining_bytes = 0;
+ void *userspace_addr = ne_user_mem_regions[i].userspace_addr;
+
+ if (temp_image_offset >= memory_size) {
+ temp_image_offset -= memory_size;
+
+ continue;
+ } else if (temp_image_offset != 0) {
+ memory_offset = temp_image_offset;
+ memory_size -= temp_image_offset;
+ temp_image_offset = 0;
+ }
+
+ remaining_bytes = enclave_image_size - image_written_bytes;
+ bytes_to_write = memory_size < remaining_bytes ?
+ memory_size : remaining_bytes;
+
+ memcpy(userspace_addr + memory_offset,
+ enclave_image + image_written_bytes, bytes_to_write);
+
+ image_written_bytes += bytes_to_write;
+
+ if (image_written_bytes == enclave_image_size)
+ break;
+ }
+
+ munmap(enclave_image, enclave_image_size);
+
+ close(enclave_image_fd);
+
+ return 0;
+}
+
+/**
+ * ne_set_user_mem_region() - Set a user space memory region for the given enclave.
+ * @enclave_fd : The file descriptor associated with the enclave.
+ * @ne_user_mem_region : User space memory region to be set for the enclave.
+ *
+ * Context: Process context.
+ * Return:
+ * * 0 on success.
+ * * Negative return value on failure.
+ */
+static int ne_set_user_mem_region(int enclave_fd, struct ne_user_mem_region ne_user_mem_region)
+{
+ struct ne_user_memory_region mem_region = {
+ .flags = NE_DEFAULT_MEMORY_REGION,
+ .memory_size = ne_user_mem_region.memory_size,
+ .userspace_addr = (__u64)ne_user_mem_region.userspace_addr,
+ };
+ int rc = -EINVAL;
+
+ rc = ioctl(enclave_fd, NE_SET_USER_MEMORY_REGION, &mem_region);
+ if (rc < 0) {
+ switch (errno) {
+ case NE_ERR_NOT_IN_INIT_STATE: {
+ printf("Error in set user memory region, enclave not in init state\n");
+
+ break;
+ }
+
+ case NE_ERR_INVALID_MEM_REGION_SIZE: {
+ printf("Error in set user memory region, mem size not multiple of 2 MiB\n");
+
+ break;
+ }
+
+ case NE_ERR_INVALID_MEM_REGION_ADDR: {
+ printf("Error in set user memory region, invalid user space address\n");
+
+ break;
+ }
+
+ case NE_ERR_UNALIGNED_MEM_REGION_ADDR: {
+ printf("Error in set user memory region, unaligned user space address\n");
+
+ break;
+ }
+
+ case NE_ERR_MEM_REGION_ALREADY_USED: {
+ printf("Error in set user memory region, memory region already used\n");
+
+ break;
+ }
+
+ case NE_ERR_MEM_NOT_HUGE_PAGE: {
+ printf("Error in set user memory region, not backed by huge pages\n");
+
+ break;
+ }
+
+ case NE_ERR_MEM_DIFFERENT_NUMA_NODE: {
+ printf("Error in set user memory region, different NUMA node than CPUs\n");
+
+ break;
+ }
+
+ case NE_ERR_MEM_MAX_REGIONS: {
+ printf("Error in set user memory region, max memory regions reached\n");
+
+ break;
+ }
+
+ case NE_ERR_INVALID_PAGE_SIZE: {
+ printf("Error in set user memory region, has page not multiple of 2 MiB\n");
+
+ break;
+ }
+
+ case NE_ERR_INVALID_FLAG_VALUE: {
+ printf("Error in set user memory region, provided invalid flag\n");
+
+ break;
+ }
+
+ default:
+ printf("Error in set user memory region [%m]\n");
+ }
+
+ return rc;
+ }
+
+ return 0;
+}
+
+/**
+ * ne_free_mem_regions() - Unmap all the user space memory regions that were set
+ * aside for the enclave.
+ * @ne_user_mem_regions: The user space memory regions associated with an enclave.
+ *
+ * Context: Process context.
+ */
+static void ne_free_mem_regions(struct ne_user_mem_region ne_user_mem_regions[])
+{
+ unsigned int i = 0;
+
+ for (i = 0; i < NE_DEFAULT_NR_MEM_REGIONS; i++)
+ munmap(ne_user_mem_regions[i].userspace_addr,
+ ne_user_mem_regions[i].memory_size);
+}
+
+/**
+ * ne_add_vcpu() - Add a vCPU to the given enclave.
+ * @enclave_fd : The file descriptor associated with the enclave.
+ * @vcpu_id: vCPU id to be set for the enclave, either provided or
+ * auto-generated (if provided vCPU id is 0).
+ *
+ * Context: Process context.
+ * Return:
+ * * 0 on success.
+ * * Negative return value on failure.
+ */
+static int ne_add_vcpu(int enclave_fd, unsigned int *vcpu_id)
+{
+ int rc = -EINVAL;
+
+ rc = ioctl(enclave_fd, NE_ADD_VCPU, vcpu_id);
+ if (rc < 0) {
+ switch (errno) {
+ case NE_ERR_NO_CPUS_AVAIL_IN_POOL: {
+ printf("Error in add vcpu, no CPUs available in the NE CPU pool\n");
+
+ break;
+ }
+
+ case NE_ERR_VCPU_ALREADY_USED: {
+ printf("Error in add vcpu, the provided vCPU is already used\n");
+
+ break;
+ }
+
+ case NE_ERR_VCPU_NOT_IN_CPU_POOL: {
+ printf("Error in add vcpu, the provided vCPU is not in the NE CPU pool\n");
+
+ break;
+ }
+
+ case NE_ERR_VCPU_INVALID_CPU_CORE: {
+ printf("Error in add vcpu, the core id of the provided vCPU is invalid\n");
+
+ break;
+ }
+
+ case NE_ERR_NOT_IN_INIT_STATE: {
+ printf("Error in add vcpu, enclave not in init state\n");
+
+ break;
+ }
+
+ case NE_ERR_INVALID_VCPU: {
+ printf("Error in add vcpu, the provided vCPU is out of avail CPUs range\n");
+
+ break;
+ }
+
+ default:
+ printf("Error in add vcpu [%m]\n");
+ }
+
+ return rc;
+ }
+
+ return 0;
+}
+
+/**
+ * ne_start_enclave() - Start the given enclave.
+ * @enclave_fd : The file descriptor associated with the enclave.
+ * @enclave_start_info : Enclave metadata used for starting e.g. vsock CID.
+ *
+ * Context: Process context.
+ * Return:
+ * * 0 on success.
+ * * Negative return value on failure.
+ */
+static int ne_start_enclave(int enclave_fd, struct ne_enclave_start_info *enclave_start_info)
+{
+ int rc = -EINVAL;
+
+ rc = ioctl(enclave_fd, NE_START_ENCLAVE, enclave_start_info);
+ if (rc < 0) {
+ switch (errno) {
+ case NE_ERR_NOT_IN_INIT_STATE: {
+ printf("Error in start enclave, enclave not in init state\n");
+
+ break;
+ }
+
+ case NE_ERR_NO_MEM_REGIONS_ADDED: {
+ printf("Error in start enclave, no memory regions have been added\n");
+
+ break;
+ }
+
+ case NE_ERR_NO_VCPUS_ADDED: {
+ printf("Error in start enclave, no vCPUs have been added\n");
+
+ break;
+ }
+
+ case NE_ERR_FULL_CORES_NOT_USED: {
+ printf("Error in start enclave, enclave has no full cores set\n");
+
+ break;
+ }
+
+ case NE_ERR_ENCLAVE_MEM_MIN_SIZE: {
+ printf("Error in start enclave, enclave memory is less than min size\n");
+
+ break;
+ }
+
+ case NE_ERR_INVALID_FLAG_VALUE: {
+ printf("Error in start enclave, provided invalid flag\n");
+
+ break;
+ }
+
+ case NE_ERR_INVALID_ENCLAVE_CID: {
+ printf("Error in start enclave, provided invalid enclave CID\n");
+
+ break;
+ }
+
+ default:
+ printf("Error in start enclave [%m]\n");
+ }
+
+ return rc;
+ }
+
+ return 0;
+}
+
+/**
+ * ne_start_enclave_check_booted() - Start the enclave and wait for a heartbeat
+ * from it, on a newly created vsock channel,
+ * to check it has booted.
+ * @enclave_fd : The file descriptor associated with the enclave.
+ *
+ * Context: Process context.
+ * Return:
+ * * 0 on success.
+ * * Negative return value on failure.
+ */
+static int ne_start_enclave_check_booted(int enclave_fd)
+{
+ struct sockaddr_vm client_vsock_addr = {};
+ int client_vsock_fd = -1;
+ socklen_t client_vsock_len = sizeof(client_vsock_addr);
+ struct ne_enclave_start_info enclave_start_info = {};
+ struct pollfd fds[1] = {};
+ int rc = -EINVAL;
+ unsigned char recv_buf = 0;
+ struct sockaddr_vm server_vsock_addr = {
+ .svm_family = AF_VSOCK,
+ .svm_cid = NE_IMAGE_LOAD_HEARTBEAT_CID,
+ .svm_port = NE_IMAGE_LOAD_HEARTBEAT_PORT,
+ };
+ int server_vsock_fd = -1;
+
+ server_vsock_fd = socket(AF_VSOCK, SOCK_STREAM, 0);
+ if (server_vsock_fd < 0) {
+ rc = server_vsock_fd;
+
+ printf("Error in socket [%m]\n");
+
+ return rc;
+ }
+
+ rc = bind(server_vsock_fd, (struct sockaddr *)&server_vsock_addr,
+ sizeof(server_vsock_addr));
+ if (rc < 0) {
+ printf("Error in bind [%m]\n");
+
+ goto out;
+ }
+
+ rc = listen(server_vsock_fd, 1);
+ if (rc < 0) {
+ printf("Error in listen [%m]\n");
+
+ goto out;
+ }
+
+ rc = ne_start_enclave(enclave_fd, &enclave_start_info);
+ if (rc < 0)
+ goto out;
+
+ printf("Enclave started, CID %llu\n", enclave_start_info.enclave_cid);
+
+ fds[0].fd = server_vsock_fd;
+ fds[0].events = POLLIN;
+
+ rc = poll(fds, 1, NE_POLL_WAIT_TIME_MS);
+ if (rc < 0) {
+ printf("Error in poll [%m]\n");
+
+ goto out;
+ }
+
+ if (!rc) {
+ printf("Poll timeout, %d seconds elapsed\n", NE_POLL_WAIT_TIME);
+
+ rc = -ETIMEDOUT;
+
+ goto out;
+ }
+
+ if ((fds[0].revents & POLLIN) == 0) {
+ printf("Poll received value %d\n", fds[0].revents);
+
+ rc = -EINVAL;
+
+ goto out;
+ }
+
+ rc = accept(server_vsock_fd, (struct sockaddr *)&client_vsock_addr,
+ &client_vsock_len);
+ if (rc < 0) {
+ printf("Error in accept [%m]\n");
+
+ goto out;
+ }
+
+ client_vsock_fd = rc;
+
+ /*
+ * Read the heartbeat value that the init process in the enclave sends
+ * after vsock connect.
+ */
+ rc = read(client_vsock_fd, &recv_buf, sizeof(recv_buf));
+ if (rc < 0) {
+ printf("Error in read [%m]\n");
+
+ goto out;
+ }
+
+ if (rc != sizeof(recv_buf) || recv_buf != NE_IMAGE_LOAD_HEARTBEAT_VALUE) {
+ printf("Read %d instead of %d\n", recv_buf,
+ NE_IMAGE_LOAD_HEARTBEAT_VALUE);
+
+ goto out;
+ }
+
+ /* Write the heartbeat value back. */
+ rc = write(client_vsock_fd, &recv_buf, sizeof(recv_buf));
+ if (rc < 0) {
+ printf("Error in write [%m]\n");
+
+ goto out;
+ }
+
+ rc = 0;
+
+out:
+ close(server_vsock_fd);
+
+ return rc;
+}
+
+int main(int argc, char *argv[])
+{
+ int enclave_fd = -1;
+ unsigned int i = 0;
+ int ne_dev_fd = -1;
+ struct ne_user_mem_region ne_user_mem_regions[NE_DEFAULT_NR_MEM_REGIONS] = {};
+ unsigned int ne_vcpus[NE_DEFAULT_NR_VCPUS] = {};
+ int rc = -EINVAL;
+ pthread_t thread_id = 0;
+ unsigned long slot_uid = 0;
+
+ if (argc != 2) {
+ printf("Usage: %s <path_to_enclave_image>\n", argv[0]);
+
+ exit(EXIT_FAILURE);
+ }
+
+ if (strlen(argv[1]) >= PATH_MAX) {
+ printf("The size of the path to enclave image is higher than max path\n");
+
+ exit(EXIT_FAILURE);
+ }
+
+ ne_dev_fd = open(NE_DEV_NAME, O_RDWR | O_CLOEXEC);
+ if (ne_dev_fd < 0) {
+ printf("Error in open NE device [%m]\n");
+
+ exit(EXIT_FAILURE);
+ }
+
+ printf("Creating enclave slot ...\n");
+
+ rc = ne_create_vm(ne_dev_fd, &slot_uid, &enclave_fd);
+
+ close(ne_dev_fd);
+
+ if (rc < 0)
+ exit(EXIT_FAILURE);
+
+ printf("Enclave fd %d\n", enclave_fd);
+
+ rc = pthread_create(&thread_id, NULL, ne_poll_enclave_fd, (void *)&enclave_fd);
+ if (rc < 0) {
+ printf("Error in thread create [%m]\n");
+
+ close(enclave_fd);
+
+ exit(EXIT_FAILURE);
+ }
+
+ for (i = 0; i < NE_DEFAULT_NR_MEM_REGIONS; i++) {
+ ne_user_mem_regions[i].memory_size = NE_MIN_MEM_REGION_SIZE;
+
+ rc = ne_alloc_user_mem_region(&ne_user_mem_regions[i]);
+ if (rc < 0) {
+ printf("Error in alloc userspace memory region, iter %d\n", i);
+
+ goto release_enclave_fd;
+ }
+ }
+
+ rc = ne_load_enclave_image(enclave_fd, ne_user_mem_regions, argv[1]);
+ if (rc < 0)
+ goto release_enclave_fd;
+
+ for (i = 0; i < NE_DEFAULT_NR_MEM_REGIONS; i++) {
+ rc = ne_set_user_mem_region(enclave_fd, ne_user_mem_regions[i]);
+ if (rc < 0) {
+ printf("Error in set memory region, iter %d\n", i);
+
+ goto release_enclave_fd;
+ }
+ }
+
+ printf("Enclave memory regions were added\n");
+
+ for (i = 0; i < NE_DEFAULT_NR_VCPUS; i++) {
+ /*
+ * The vCPU is chosen from the enclave vCPU pool, if the value
+ * of the vcpu_id is 0.
+ */
+ ne_vcpus[i] = 0;
+ rc = ne_add_vcpu(enclave_fd, &ne_vcpus[i]);
+ if (rc < 0) {
+ printf("Error in add vcpu, iter %d\n", i);
+
+ goto release_enclave_fd;
+ }
+
+ printf("Added vCPU %d to the enclave\n", ne_vcpus[i]);
+ }
+
+ printf("Enclave vCPUs were added\n");
+
+ rc = ne_start_enclave_check_booted(enclave_fd);
+ if (rc < 0) {
+ printf("Error in the enclave start / image loading heartbeat logic [rc=%d]\n", rc);
+
+ goto release_enclave_fd;
+ }
+
+ printf("Entering sleep for %d seconds ...\n", NE_SLEEP_TIME);
+
+ sleep(NE_SLEEP_TIME);
+
+ close(enclave_fd);
+
+ ne_free_mem_regions(ne_user_mem_regions);
+
+ exit(EXIT_SUCCESS);
+
+release_enclave_fd:
+ close(enclave_fd);
+ ne_free_mem_regions(ne_user_mem_regions);
+
+ exit(EXIT_FAILURE);
+}
diff --git a/samples/pidfd/.gitignore b/samples/pidfd/.gitignore
index be52b3ba6e4b..d4cfa3176b1b 100644
--- a/samples/pidfd/.gitignore
+++ b/samples/pidfd/.gitignore
@@ -1 +1,2 @@
-pidfd-metadata
+# SPDX-License-Identifier: GPL-2.0-only
+/pidfd-metadata
diff --git a/samples/pidfd/Makefile b/samples/pidfd/Makefile
index ee2979849d92..9754e2d81f70 100644
--- a/samples/pidfd/Makefile
+++ b/samples/pidfd/Makefile
@@ -1,6 +1,4 @@
# SPDX-License-Identifier: GPL-2.0
+usertprogs-always-y += pidfd-metadata
-hostprogs := pidfd-metadata
-always-y := $(hostprogs)
-HOSTCFLAGS_pidfd-metadata.o += -I$(objtree)/usr/include
-all: pidfd-metadata
+userccflags += -I usr/include
diff --git a/samples/pktgen/README.rst b/samples/pktgen/README.rst
index 3f6483e8b2df..f4adeed5f5f0 100644
--- a/samples/pktgen/README.rst
+++ b/samples/pktgen/README.rst
@@ -3,7 +3,7 @@ Sample and benchmark scripts for pktgen (packet generator)
This directory contains some pktgen sample and benchmark scripts, that
can easily be copied and adjusted for your own use-case.
-General doc is located in kernel: Documentation/networking/pktgen.txt
+General doc is located in kernel: Documentation/networking/pktgen.rst
Helper include files
====================
@@ -28,10 +28,28 @@ across the sample scripts. Usage example is printed on errors::
-b : ($BURST) HW level bursting of SKBs
-v : ($VERBOSE) verbose
-x : ($DEBUG) debug
+ -6 : ($IP6) IPv6
+ -w : ($DELAY) Tx Delay value (ns)
+ -a : ($APPEND) Script will not reset generator's state, but will append its config
The global variable being set is also listed. E.g. the required
interface/device parameter "-i" sets variable $DEV.
+"-a" parameter may be used to create different flows simultaneously.
+In this mode script will keep the existing config, will append its settings.
+In this mode you'll have to manually run traffic with "pg_ctrl start".
+
+For example you may use:
+
+ source ./samples/pktgen/functions.sh
+ pg_ctrl reset
+ # add first device
+ ./pktgen_sample06_numa_awared_queue_irq_affinity.sh -a -i ens1f0 -m 34:80:0d:a3:fc:c9 -t 8
+ # add second device
+ ./pktgen_sample06_numa_awared_queue_irq_affinity.sh -a -i ens1f1 -m 34:80:0d:a3:fc:c9 -t 8
+ # run joint traffic on two devs
+ pg_ctrl start
+
Common functions
----------------
The functions.sh file provides; Three different shell functions for
diff --git a/samples/pktgen/functions.sh b/samples/pktgen/functions.sh
index dae06d5b38fa..933194257a24 100644
--- a/samples/pktgen/functions.sh
+++ b/samples/pktgen/functions.sh
@@ -108,7 +108,12 @@ function pgset() {
fi
}
-[[ $EUID -eq 0 ]] && trap 'pg_ctrl "reset"' EXIT
+if [[ -z "$APPEND" ]]; then
+ if [[ $EUID -eq 0 ]]; then
+ # Cleanup pktgen setup on exit if thats not "append mode"
+ trap 'pg_ctrl "reset"' EXIT
+ fi
+fi
## -- General shell tricks --
@@ -118,7 +123,7 @@ function root_check_run_with_sudo() {
if [ "$EUID" -ne 0 ]; then
if [ -x $0 ]; then # Directly executable use sudo
info "Not root, running with sudo"
- sudo "$0" "$@"
+ sudo -E "$0" "$@"
exit $?
fi
err 4 "cannot perform sudo run of $0"
diff --git a/samples/pktgen/parameters.sh b/samples/pktgen/parameters.sh
index ff0ed474fee9..81906f199454 100644
--- a/samples/pktgen/parameters.sh
+++ b/samples/pktgen/parameters.sh
@@ -11,6 +11,7 @@ function usage() {
echo " -d : (\$DEST_IP) destination IP. CIDR (e.g. 198.18.0.0/15) is also allowed"
echo " -m : (\$DST_MAC) destination MAC-addr"
echo " -p : (\$DST_PORT) destination PORT range (e.g. 433-444) is also allowed"
+ echo " -k : (\$UDP_CSUM) enable UDP tx checksum"
echo " -t : (\$THREADS) threads to start"
echo " -f : (\$F_THREAD) index of first thread (zero indexed CPU number)"
echo " -c : (\$SKB_CLONE) SKB clones send before alloc new SKB"
@@ -19,12 +20,14 @@ function usage() {
echo " -v : (\$VERBOSE) verbose"
echo " -x : (\$DEBUG) debug"
echo " -6 : (\$IP6) IPv6"
+ echo " -w : (\$DELAY) Tx Delay value (ns)"
+ echo " -a : (\$APPEND) Script will not reset generator's state, but will append its config"
echo ""
}
## --- Parse command line arguments / parameters ---
## echo "Commandline options:"
-while getopts "s:i:d:m:p:f:t:c:n:b:vxh6" option; do
+while getopts "s:i:d:m:p:f:t:c:n:b:w:vxh6ak" option; do
case $option in
i) # interface
export DEV=$OPTARG
@@ -66,6 +69,10 @@ while getopts "s:i:d:m:p:f:t:c:n:b:vxh6" option; do
export BURST=$OPTARG
info "SKB bursting: BURST=$BURST"
;;
+ w)
+ export DELAY=$OPTARG
+ info "DELAY=$DELAY"
+ ;;
v)
export VERBOSE=yes
info "Verbose mode: VERBOSE=$VERBOSE"
@@ -78,6 +85,14 @@ while getopts "s:i:d:m:p:f:t:c:n:b:vxh6" option; do
export IP6=6
info "IP6: IP6=$IP6"
;;
+ a)
+ export APPEND=yes
+ info "Append mode: APPEND=$APPEND"
+ ;;
+ k)
+ export UDP_CSUM=yes
+ info "UDP tx checksum: UDP_CSUM=$UDP_CSUM"
+ ;;
h|?|*)
usage;
err 2 "[ERROR] Unknown parameters!!!"
@@ -100,6 +115,9 @@ if [ -z "$THREADS" ]; then
export THREADS=1
fi
+# default DELAY
+[ -z "$DELAY" ] && export DELAY=0 # Zero means max speed
+
export L_THREAD=$(( THREADS + F_THREAD - 1 ))
if [ -z "$DEV" ]; then
diff --git a/samples/pktgen/pktgen_bench_xmit_mode_netif_receive.sh b/samples/pktgen/pktgen_bench_xmit_mode_netif_receive.sh
index 1b6204125d2d..99ec0688b044 100755
--- a/samples/pktgen/pktgen_bench_xmit_mode_netif_receive.sh
+++ b/samples/pktgen/pktgen_bench_xmit_mode_netif_receive.sh
@@ -50,9 +50,6 @@ if [ -n "$DST_PORT" ]; then
validate_ports $UDP_DST_MIN $UDP_DST_MAX
fi
-# Base Config
-DELAY="0" # Zero means max speed
-
# General cleanup everything since last run
pg_ctrl "reset"
@@ -92,14 +89,21 @@ for ((thread = $F_THREAD; thread <= $L_THREAD; thread++)); do
pg_set $dev "burst $BURST"
done
+# Run if user hits control-c
+function print_result() {
+ # Print results
+ for ((thread = $F_THREAD; thread <= $L_THREAD; thread++)); do
+ dev=${DEV}@${thread}
+ echo "Device: $dev"
+ cat /proc/net/pktgen/$dev | grep -A2 "Result:"
+ done
+}
+# trap keyboard interrupt (Ctrl-C)
+trap true SIGINT
+
# start_run
echo "Running... ctrl^C to stop" >&2
pg_ctrl "start"
echo "Done" >&2
-# Print results
-for ((thread = $F_THREAD; thread <= $L_THREAD; thread++)); do
- dev=${DEV}@${thread}
- echo "Device: $dev"
- cat /proc/net/pktgen/$dev | grep -A2 "Result:"
-done
+print_result
diff --git a/samples/pktgen/pktgen_bench_xmit_mode_queue_xmit.sh b/samples/pktgen/pktgen_bench_xmit_mode_queue_xmit.sh
index e607cb369b20..04b0dd0c36d6 100755
--- a/samples/pktgen/pktgen_bench_xmit_mode_queue_xmit.sh
+++ b/samples/pktgen/pktgen_bench_xmit_mode_queue_xmit.sh
@@ -33,9 +33,6 @@ if [ -n "$DST_PORT" ]; then
validate_ports $UDP_DST_MIN $UDP_DST_MAX
fi
-# Base Config
-DELAY="0" # Zero means max speed
-
# General cleanup everything since last run
pg_ctrl "reset"
@@ -72,14 +69,21 @@ for ((thread = $F_THREAD; thread <= $L_THREAD; thread++)); do
pg_set $dev "xmit_mode queue_xmit"
done
+# Run if user hits control-c
+function print_result {
+ # Print results
+ for ((thread = $F_THREAD; thread <= $L_THREAD; thread++)); do
+ dev=${DEV}@${thread}
+ echo "Device: $dev"
+ cat /proc/net/pktgen/$dev | grep -A2 "Result:"
+ done
+}
+# trap keyboard interrupt (Ctrl-C)
+trap true SIGINT
+
# start_run
echo "Running... ctrl^C to stop" >&2
pg_ctrl "start"
echo "Done" >&2
-# Print results
-for ((thread = $F_THREAD; thread <= $L_THREAD; thread++)); do
- dev=${DEV}@${thread}
- echo "Device: $dev"
- cat /proc/net/pktgen/$dev | grep -A2 "Result:"
-done
+print_result
diff --git a/samples/pktgen/pktgen_sample01_simple.sh b/samples/pktgen/pktgen_sample01_simple.sh
index a4e250b45dce..09a92ea963f9 100755
--- a/samples/pktgen/pktgen_sample01_simple.sh
+++ b/samples/pktgen/pktgen_sample01_simple.sh
@@ -31,20 +31,17 @@ if [ -n "$DST_PORT" ]; then
validate_ports $UDP_DST_MIN $UDP_DST_MAX
fi
-# Base Config
-DELAY="0" # Zero means max speed
-
# Flow variation random source port between min and max
UDP_SRC_MIN=9
UDP_SRC_MAX=109
# General cleanup everything since last run
# (especially important if other threads were configured by other scripts)
-pg_ctrl "reset"
+[ -z "$APPEND" ] && pg_ctrl "reset"
# Add remove all other devices and add_device $DEV to thread 0
thread=0
-pg_thread $thread "rem_device_all"
+[ -z "$APPEND" ] && pg_thread $thread "rem_device_all"
pg_thread $thread "add_device" $DEV
# How many packets to send (zero means indefinitely)
@@ -75,16 +72,29 @@ if [ -n "$DST_PORT" ]; then
pg_set $DEV "udp_dst_max $UDP_DST_MAX"
fi
+[ ! -z "$UDP_CSUM" ] && pg_set $dev "flag UDPCSUM"
+
# Setup random UDP port src range
pg_set $DEV "flag UDPSRC_RND"
pg_set $DEV "udp_src_min $UDP_SRC_MIN"
pg_set $DEV "udp_src_max $UDP_SRC_MAX"
-# start_run
-echo "Running... ctrl^C to stop" >&2
-pg_ctrl "start"
-echo "Done" >&2
+# Run if user hits control-c
+function print_result() {
+ # Print results
+ echo "Result device: $DEV"
+ cat /proc/net/pktgen/$DEV
+}
+# trap keyboard interrupt (Ctrl-C)
+trap true SIGINT
+
+if [ -z "$APPEND" ]; then
+ # start_run
+ echo "Running... ctrl^C to stop" >&2
+ pg_ctrl "start"
+ echo "Done" >&2
-# Print results
-echo "Result device: $DEV"
-cat /proc/net/pktgen/$DEV
+ print_result
+else
+ echo "Append mode: config done. Do more or use 'pg_ctrl start' to run"
+fi \ No newline at end of file
diff --git a/samples/pktgen/pktgen_sample02_multiqueue.sh b/samples/pktgen/pktgen_sample02_multiqueue.sh
index cb2495fcdc60..7fa41c84c32f 100755
--- a/samples/pktgen/pktgen_sample02_multiqueue.sh
+++ b/samples/pktgen/pktgen_sample02_multiqueue.sh
@@ -17,7 +17,6 @@ source ${basedir}/parameters.sh
[ -z "$COUNT" ] && COUNT="100000" # Zero means indefinitely
# Base Config
-DELAY="0" # Zero means max speed
[ -z "$CLONE_SKB" ] && CLONE_SKB="0"
# Flow variation random source port between min and max
@@ -39,7 +38,7 @@ if [ -n "$DST_PORT" ]; then
fi
# General cleanup everything since last run
-pg_ctrl "reset"
+[ -z "$APPEND" ] && pg_ctrl "reset"
# Threads are specified with parameter -t value in $THREADS
for ((thread = $F_THREAD; thread <= $L_THREAD; thread++)); do
@@ -48,7 +47,7 @@ for ((thread = $F_THREAD; thread <= $L_THREAD; thread++)); do
dev=${DEV}@${thread}
# Add remove all other devices and add_device $dev to thread
- pg_thread $thread "rem_device_all"
+ [ -z "$APPEND" ] && pg_thread $thread "rem_device_all"
pg_thread $thread "add_device" $dev
# Notice config queue to map to cpu (mirrors smp_processor_id())
@@ -76,20 +75,33 @@ for ((thread = $F_THREAD; thread <= $L_THREAD; thread++)); do
pg_set $dev "udp_dst_max $UDP_DST_MAX"
fi
+ [ ! -z "$UDP_CSUM" ] && pg_set $dev "flag UDPCSUM"
+
# Setup random UDP port src range
pg_set $dev "flag UDPSRC_RND"
pg_set $dev "udp_src_min $UDP_SRC_MIN"
pg_set $dev "udp_src_max $UDP_SRC_MAX"
done
-# start_run
-echo "Running... ctrl^C to stop" >&2
-pg_ctrl "start"
-echo "Done" >&2
+# Run if user hits control-c
+function print_result() {
+ # Print results
+ for ((thread = $F_THREAD; thread <= $L_THREAD; thread++)); do
+ dev=${DEV}@${thread}
+ echo "Device: $dev"
+ cat /proc/net/pktgen/$dev | grep -A2 "Result:"
+ done
+}
+# trap keyboard interrupt (Ctrl-C)
+trap true SIGINT
-# Print results
-for ((thread = $F_THREAD; thread <= $L_THREAD; thread++)); do
- dev=${DEV}@${thread}
- echo "Device: $dev"
- cat /proc/net/pktgen/$dev | grep -A2 "Result:"
-done
+if [ -z "$APPEND" ]; then
+ # start_run
+ echo "Running... ctrl^C to stop" >&2
+ pg_ctrl "start"
+ echo "Done" >&2
+
+ print_result
+else
+ echo "Append mode: config done. Do more or use 'pg_ctrl start' to run"
+fi
diff --git a/samples/pktgen/pktgen_sample03_burst_single_flow.sh b/samples/pktgen/pktgen_sample03_burst_single_flow.sh
index fff50765a5aa..8bf2fdffba16 100755
--- a/samples/pktgen/pktgen_sample03_burst_single_flow.sh
+++ b/samples/pktgen/pktgen_sample03_burst_single_flow.sh
@@ -42,18 +42,15 @@ if [ -n "$DST_PORT" ]; then
validate_ports $UDP_DST_MIN $UDP_DST_MAX
fi
-# Base Config
-DELAY="0" # Zero means max speed
-
# General cleanup everything since last run
-pg_ctrl "reset"
+[ -z "$APPEND" ] && pg_ctrl "reset"
# Threads are specified with parameter -t value in $THREADS
for ((thread = $F_THREAD; thread <= $L_THREAD; thread++)); do
dev=${DEV}@${thread}
# Add remove all other devices and add_device $dev to thread
- pg_thread $thread "rem_device_all"
+ [ -z "$APPEND" ] && pg_thread $thread "rem_device_all"
pg_thread $thread "add_device" $dev
# Base config
@@ -76,6 +73,8 @@ for ((thread = $F_THREAD; thread <= $L_THREAD; thread++)); do
pg_set $dev "udp_dst_max $UDP_DST_MAX"
fi
+ [ ! -z "$UDP_CSUM" ] && pg_set $dev "flag UDPCSUM"
+
# Setup burst, for easy testing -b 0 disable bursting
# (internally in pktgen default and minimum burst=1)
if [[ ${BURST} -ne 0 ]]; then
@@ -86,7 +85,7 @@ for ((thread = $F_THREAD; thread <= $L_THREAD; thread++)); do
done
# Run if user hits control-c
-function control_c() {
+function print_result() {
# Print results
for ((thread = $F_THREAD; thread <= $L_THREAD; thread++)); do
dev=${DEV}@${thread}
@@ -95,7 +94,13 @@ function control_c() {
done
}
# trap keyboard interrupt (Ctrl-C)
-trap control_c SIGINT
+trap true SIGINT
-echo "Running... ctrl^C to stop" >&2
-pg_ctrl "start"
+if [ -z "$APPEND" ]; then
+ echo "Running... ctrl^C to stop" >&2
+ pg_ctrl "start"
+
+ print_result
+else
+ echo "Append mode: config done. Do more or use 'pg_ctrl start' to run"
+fi
diff --git a/samples/pktgen/pktgen_sample04_many_flows.sh b/samples/pktgen/pktgen_sample04_many_flows.sh
index 2cd6b701400d..cff51f861506 100755
--- a/samples/pktgen/pktgen_sample04_many_flows.sh
+++ b/samples/pktgen/pktgen_sample04_many_flows.sh
@@ -13,13 +13,15 @@ root_check_run_with_sudo "$@"
# Parameter parsing via include
source ${basedir}/parameters.sh
# Set some default params, if they didn't get set
-[ -z "$DEST_IP" ] && DEST_IP="198.18.0.42"
+if [ -z "$DEST_IP" ]; then
+ [ -z "$IP6" ] && DEST_IP="198.18.0.42" || DEST_IP="FD00::1"
+fi
[ -z "$DST_MAC" ] && DST_MAC="90:e2:ba:ff:ff:ff"
[ -z "$CLONE_SKB" ] && CLONE_SKB="0"
[ -z "$COUNT" ] && COUNT="0" # Zero means indefinitely
if [ -n "$DEST_IP" ]; then
- validate_addr $DEST_IP
- read -r DST_MIN DST_MAX <<< $(parse_addr $DEST_IP)
+ validate_addr${IP6} $DEST_IP
+ read -r DST_MIN DST_MAX <<< $(parse_addr${IP6} $DEST_IP)
fi
if [ -n "$DST_PORT" ]; then
read -r UDP_DST_MIN UDP_DST_MAX <<< $(parse_ports $DST_PORT)
@@ -34,9 +36,6 @@ fi
[ -z "$FLOWS" ] && FLOWS="8000"
[ -z "$FLOWLEN" ] && FLOWLEN="10"
-# Base Config
-DELAY="0" # Zero means max speed
-
if [[ -n "$BURST" ]]; then
err 1 "Bursting not supported for this mode"
fi
@@ -45,14 +44,14 @@ fi
read -r SRC_MIN SRC_MAX <<< $(parse_addr 198.18.0.0/15)
# General cleanup everything since last run
-pg_ctrl "reset"
+[ -z "$APPEND" ] && pg_ctrl "reset"
# Threads are specified with parameter -t value in $THREADS
for ((thread = $F_THREAD; thread <= $L_THREAD; thread++)); do
dev=${DEV}@${thread}
# Add remove all other devices and add_device $dev to thread
- pg_thread $thread "rem_device_all"
+ [ -z "$APPEND" ] && pg_thread $thread "rem_device_all"
pg_thread $thread "add_device" $dev
# Base config
@@ -65,8 +64,8 @@ for ((thread = $F_THREAD; thread <= $L_THREAD; thread++)); do
# Single destination
pg_set $dev "dst_mac $DST_MAC"
- pg_set $dev "dst_min $DST_MIN"
- pg_set $dev "dst_max $DST_MAX"
+ pg_set $dev "dst${IP6}_min $DST_MIN"
+ pg_set $dev "dst${IP6}_max $DST_MAX"
if [ -n "$DST_PORT" ]; then
# Single destination port or random port range
@@ -75,6 +74,8 @@ for ((thread = $F_THREAD; thread <= $L_THREAD; thread++)); do
pg_set $dev "udp_dst_max $UDP_DST_MAX"
fi
+ [ ! -z "$UDP_CSUM" ] && pg_set $dev "flag UDPCSUM"
+
# Randomize source IP-addresses
pg_set $dev "flag IPSRC_RND"
pg_set $dev "src_min $SRC_MIN"
@@ -107,7 +108,11 @@ function print_result() {
# trap keyboard interrupt (Ctrl-C)
trap true SIGINT
-echo "Running... ctrl^C to stop" >&2
-pg_ctrl "start"
+if [ -z "$APPEND" ]; then
+ echo "Running... ctrl^C to stop" >&2
+ pg_ctrl "start"
-print_result
+ print_result
+else
+ echo "Append mode: config done. Do more or use 'pg_ctrl start' to run"
+fi
diff --git a/samples/pktgen/pktgen_sample05_flow_per_thread.sh b/samples/pktgen/pktgen_sample05_flow_per_thread.sh
index 4cb6252ade39..3578d0aa4ac5 100755
--- a/samples/pktgen/pktgen_sample05_flow_per_thread.sh
+++ b/samples/pktgen/pktgen_sample05_flow_per_thread.sh
@@ -17,32 +17,31 @@ root_check_run_with_sudo "$@"
# Parameter parsing via include
source ${basedir}/parameters.sh
# Set some default params, if they didn't get set
-[ -z "$DEST_IP" ] && DEST_IP="198.18.0.42"
+if [ -z "$DEST_IP" ]; then
+ [ -z "$IP6" ] && DEST_IP="198.18.0.42" || DEST_IP="FD00::1"
+fi
[ -z "$DST_MAC" ] && DST_MAC="90:e2:ba:ff:ff:ff"
[ -z "$CLONE_SKB" ] && CLONE_SKB="0"
[ -z "$BURST" ] && BURST=32
[ -z "$COUNT" ] && COUNT="0" # Zero means indefinitely
if [ -n "$DEST_IP" ]; then
- validate_addr $DEST_IP
- read -r DST_MIN DST_MAX <<< $(parse_addr $DEST_IP)
+ validate_addr${IP6} $DEST_IP
+ read -r DST_MIN DST_MAX <<< $(parse_addr${IP6} $DEST_IP)
fi
if [ -n "$DST_PORT" ]; then
read -r UDP_DST_MIN UDP_DST_MAX <<< $(parse_ports $DST_PORT)
validate_ports $UDP_DST_MIN $UDP_DST_MAX
fi
-# Base Config
-DELAY="0" # Zero means max speed
-
# General cleanup everything since last run
-pg_ctrl "reset"
+[ -z "$APPEND" ] && pg_ctrl "reset"
# Threads are specified with parameter -t value in $THREADS
for ((thread = $F_THREAD; thread <= $L_THREAD; thread++)); do
dev=${DEV}@${thread}
# Add remove all other devices and add_device $dev to thread
- pg_thread $thread "rem_device_all"
+ [ -z "$APPEND" ] && pg_thread $thread "rem_device_all"
pg_thread $thread "add_device" $dev
# Base config
@@ -55,8 +54,8 @@ for ((thread = $F_THREAD; thread <= $L_THREAD; thread++)); do
# Single destination
pg_set $dev "dst_mac $DST_MAC"
- pg_set $dev "dst_min $DST_MIN"
- pg_set $dev "dst_max $DST_MAX"
+ pg_set $dev "dst${IP6}_min $DST_MIN"
+ pg_set $dev "dst${IP6}_max $DST_MAX"
if [ -n "$DST_PORT" ]; then
# Single destination port or random port range
@@ -65,6 +64,8 @@ for ((thread = $F_THREAD; thread <= $L_THREAD; thread++)); do
pg_set $dev "udp_dst_max $UDP_DST_MAX"
fi
+ [ ! -z "$UDP_CSUM" ] && pg_set $dev "flag UDPCSUM"
+
# Setup source IP-addresses based on thread number
pg_set $dev "src_min 198.18.$((thread+1)).1"
pg_set $dev "src_max 198.18.$((thread+1)).1"
@@ -91,7 +92,11 @@ function print_result() {
# trap keyboard interrupt (Ctrl-C)
trap true SIGINT
-echo "Running... ctrl^C to stop" >&2
-pg_ctrl "start"
+if [ -z "$APPEND" ]; then
+ echo "Running... ctrl^C to stop" >&2
+ pg_ctrl "start"
-print_result
+ print_result
+else
+ echo "Append mode: config done. Do more or use 'pg_ctrl start' to run"
+fi
diff --git a/samples/pktgen/pktgen_sample06_numa_awared_queue_irq_affinity.sh b/samples/pktgen/pktgen_sample06_numa_awared_queue_irq_affinity.sh
index 728106060a02..264cc5db9c49 100755
--- a/samples/pktgen/pktgen_sample06_numa_awared_queue_irq_affinity.sh
+++ b/samples/pktgen/pktgen_sample06_numa_awared_queue_irq_affinity.sh
@@ -15,7 +15,6 @@ root_check_run_with_sudo "$@"
source ${basedir}/parameters.sh
# Base Config
-DELAY="0" # Zero means max speed
[ -z "$COUNT" ] && COUNT="20000000" # Zero means indefinitely
[ -z "$CLONE_SKB" ] && CLONE_SKB="0"
@@ -45,7 +44,7 @@ if [ -n "$DST_PORT" ]; then
fi
# General cleanup everything since last run
-pg_ctrl "reset"
+[ -z "$APPEND" ] && pg_ctrl "reset"
# Threads are specified with parameter -t value in $THREADS
for ((i = 0; i < $THREADS; i++)); do
@@ -59,7 +58,7 @@ for ((i = 0; i < $THREADS; i++)); do
info "irq ${irq_array[$i]} is set affinity to `cat /proc/irq/${irq_array[$i]}/smp_affinity_list`"
# Add remove all other devices and add_device $dev to thread
- pg_thread $thread "rem_device_all"
+ [ -z "$APPEND" ] && pg_thread $thread "rem_device_all"
pg_thread $thread "add_device" $dev
# select queue and bind the queue and $dev in 1:1 relationship
@@ -93,21 +92,34 @@ for ((i = 0; i < $THREADS; i++)); do
pg_set $dev "udp_dst_max $UDP_DST_MAX"
fi
+ [ ! -z "$UDP_CSUM" ] && pg_set $dev "flag UDPCSUM"
+
# Setup random UDP port src range
pg_set $dev "flag UDPSRC_RND"
pg_set $dev "udp_src_min $UDP_SRC_MIN"
pg_set $dev "udp_src_max $UDP_SRC_MAX"
done
-# start_run
-echo "Running... ctrl^C to stop" >&2
-pg_ctrl "start"
-echo "Done" >&2
+# Run if user hits control-c
+function print_result() {
+ # Print results
+ for ((i = 0; i < $THREADS; i++)); do
+ thread=${cpu_array[$((i+F_THREAD))]}
+ dev=${DEV}@${thread}
+ echo "Device: $dev"
+ cat /proc/net/pktgen/$dev | grep -A2 "Result:"
+ done
+}
+# trap keyboard interrupt (Ctrl-C)
+trap true SIGINT
-# Print results
-for ((i = 0; i < $THREADS; i++)); do
- thread=${cpu_array[$((i+F_THREAD))]}
- dev=${DEV}@${thread}
- echo "Device: $dev"
- cat /proc/net/pktgen/$dev | grep -A2 "Result:"
-done
+# start_run
+if [ -z "$APPEND" ]; then
+ echo "Running... ctrl^C to stop" >&2
+ pg_ctrl "start"
+ echo "Done" >&2
+
+ print_result
+else
+ echo "Append mode: config done. Do more or use 'pg_ctrl start' to run"
+fi
diff --git a/samples/qmi/qmi_sample_client.c b/samples/qmi/qmi_sample_client.c
index c9e7276c3d83..c045e3d24326 100644
--- a/samples/qmi/qmi_sample_client.c
+++ b/samples/qmi/qmi_sample_client.c
@@ -42,7 +42,7 @@ struct test_name_type_v01 {
char name[TEST_MAX_NAME_SIZE_V01];
};
-static struct qmi_elem_info test_name_type_v01_ei[] = {
+static const struct qmi_elem_info test_name_type_v01_ei[] = {
{
.data_type = QMI_DATA_LEN,
.elem_len = 1,
@@ -71,7 +71,7 @@ struct test_ping_req_msg_v01 {
struct test_name_type_v01 client_name;
};
-static struct qmi_elem_info test_ping_req_msg_v01_ei[] = {
+static const struct qmi_elem_info test_ping_req_msg_v01_ei[] = {
{
.data_type = QMI_UNSIGNED_1_BYTE,
.elem_len = 4,
@@ -113,7 +113,7 @@ struct test_ping_resp_msg_v01 {
struct test_name_type_v01 service_name;
};
-static struct qmi_elem_info test_ping_resp_msg_v01_ei[] = {
+static const struct qmi_elem_info test_ping_resp_msg_v01_ei[] = {
{
.data_type = QMI_STRUCT,
.elem_len = 1,
@@ -172,7 +172,7 @@ struct test_data_req_msg_v01 {
struct test_name_type_v01 client_name;
};
-static struct qmi_elem_info test_data_req_msg_v01_ei[] = {
+static const struct qmi_elem_info test_data_req_msg_v01_ei[] = {
{
.data_type = QMI_DATA_LEN,
.elem_len = 1,
@@ -224,7 +224,7 @@ struct test_data_resp_msg_v01 {
struct test_name_type_v01 service_name;
};
-static struct qmi_elem_info test_data_resp_msg_v01_ei[] = {
+static const struct qmi_elem_info test_data_resp_msg_v01_ei[] = {
{
.data_type = QMI_STRUCT,
.elem_len = 1,
@@ -429,7 +429,7 @@ static const struct file_operations data_fops = {
.write = data_write,
};
-static struct qmi_msg_handler qmi_sample_handlers[] = {
+static const struct qmi_msg_handler qmi_sample_handlers[] = {
{
.type = QMI_RESPONSE,
.msg_id = TEST_PING_REQ_MSG_ID_V01,
@@ -571,7 +571,7 @@ static void qmi_sample_del_server(struct qmi_handle *qmi,
static struct qmi_handle lookup_client;
-static struct qmi_ops lookup_ops = {
+static const struct qmi_ops lookup_ops = {
.new_server = qmi_sample_new_server,
.del_server = qmi_sample_del_server,
};
diff --git a/samples/rust/Kconfig b/samples/rust/Kconfig
new file mode 100644
index 000000000000..841e0906e943
--- /dev/null
+++ b/samples/rust/Kconfig
@@ -0,0 +1,30 @@
+# SPDX-License-Identifier: GPL-2.0
+
+menuconfig SAMPLES_RUST
+ bool "Rust samples"
+ depends on RUST
+ help
+ You can build sample Rust kernel code here.
+
+ If unsure, say N.
+
+if SAMPLES_RUST
+
+config SAMPLE_RUST_MINIMAL
+ tristate "Minimal"
+ help
+ This option builds the Rust minimal module sample.
+
+ To compile this as a module, choose M here:
+ the module will be called rust_minimal.
+
+ If unsure, say N.
+
+config SAMPLE_RUST_HOSTPROGS
+ bool "Host programs"
+ help
+ This option builds the Rust host program samples.
+
+ If unsure, say N.
+
+endif # SAMPLES_RUST
diff --git a/samples/rust/Makefile b/samples/rust/Makefile
new file mode 100644
index 000000000000..1daba5f8658a
--- /dev/null
+++ b/samples/rust/Makefile
@@ -0,0 +1,5 @@
+# SPDX-License-Identifier: GPL-2.0
+
+obj-$(CONFIG_SAMPLE_RUST_MINIMAL) += rust_minimal.o
+
+subdir-$(CONFIG_SAMPLE_RUST_HOSTPROGS) += hostprogs
diff --git a/samples/rust/hostprogs/.gitignore b/samples/rust/hostprogs/.gitignore
new file mode 100644
index 000000000000..a6c173da5048
--- /dev/null
+++ b/samples/rust/hostprogs/.gitignore
@@ -0,0 +1,3 @@
+# SPDX-License-Identifier: GPL-2.0
+
+single
diff --git a/samples/rust/hostprogs/Makefile b/samples/rust/hostprogs/Makefile
new file mode 100644
index 000000000000..8ddcbd7416db
--- /dev/null
+++ b/samples/rust/hostprogs/Makefile
@@ -0,0 +1,5 @@
+# SPDX-License-Identifier: GPL-2.0
+
+hostprogs-always-y := single
+
+single-rust := y
diff --git a/samples/rust/hostprogs/a.rs b/samples/rust/hostprogs/a.rs
new file mode 100644
index 000000000000..f7a4a3d0f4e0
--- /dev/null
+++ b/samples/rust/hostprogs/a.rs
@@ -0,0 +1,7 @@
+// SPDX-License-Identifier: GPL-2.0
+
+//! Rust single host program sample: module `a`.
+
+pub(crate) fn f(x: i32) {
+ println!("The number is {}.", x);
+}
diff --git a/samples/rust/hostprogs/b.rs b/samples/rust/hostprogs/b.rs
new file mode 100644
index 000000000000..c1675890648f
--- /dev/null
+++ b/samples/rust/hostprogs/b.rs
@@ -0,0 +1,5 @@
+// SPDX-License-Identifier: GPL-2.0
+
+//! Rust single host program sample: module `b`.
+
+pub(crate) const CONSTANT: i32 = 42;
diff --git a/samples/rust/hostprogs/single.rs b/samples/rust/hostprogs/single.rs
new file mode 100644
index 000000000000..8c48a119339a
--- /dev/null
+++ b/samples/rust/hostprogs/single.rs
@@ -0,0 +1,12 @@
+// SPDX-License-Identifier: GPL-2.0
+
+//! Rust single host program sample.
+
+mod a;
+mod b;
+
+fn main() {
+ println!("Hello world!");
+
+ a::f(b::CONSTANT);
+}
diff --git a/samples/rust/rust_minimal.rs b/samples/rust/rust_minimal.rs
new file mode 100644
index 000000000000..54ad17685742
--- /dev/null
+++ b/samples/rust/rust_minimal.rs
@@ -0,0 +1,38 @@
+// SPDX-License-Identifier: GPL-2.0
+
+//! Rust minimal sample.
+
+use kernel::prelude::*;
+
+module! {
+ type: RustMinimal,
+ name: b"rust_minimal",
+ author: b"Rust for Linux Contributors",
+ description: b"Rust minimal sample",
+ license: b"GPL",
+}
+
+struct RustMinimal {
+ numbers: Vec<i32>,
+}
+
+impl kernel::Module for RustMinimal {
+ fn init(_module: &'static ThisModule) -> Result<Self> {
+ pr_info!("Rust minimal sample (init)\n");
+ pr_info!("Am I built-in? {}\n", !cfg!(MODULE));
+
+ let mut numbers = Vec::new();
+ numbers.try_push(72)?;
+ numbers.try_push(108)?;
+ numbers.try_push(200)?;
+
+ Ok(RustMinimal { numbers })
+ }
+}
+
+impl Drop for RustMinimal {
+ fn drop(&mut self) {
+ pr_info!("My numbers are {:?}\n", self.numbers);
+ pr_info!("Rust minimal sample (exit)\n");
+ }
+}
diff --git a/samples/seccomp/.gitignore b/samples/seccomp/.gitignore
index d1e2e817d556..a6df0da77c5d 100644
--- a/samples/seccomp/.gitignore
+++ b/samples/seccomp/.gitignore
@@ -1,4 +1,5 @@
-bpf-direct
-bpf-fancy
-dropper
-user-trap
+# SPDX-License-Identifier: GPL-2.0-only
+/bpf-direct
+/bpf-fancy
+/dropper
+/user-trap
diff --git a/samples/seccomp/Makefile b/samples/seccomp/Makefile
index 89279e8b87df..c85ae0ed8342 100644
--- a/samples/seccomp/Makefile
+++ b/samples/seccomp/Makefile
@@ -1,44 +1,6 @@
# SPDX-License-Identifier: GPL-2.0
-ifndef CROSS_COMPILE
-hostprogs := bpf-fancy dropper bpf-direct user-trap
+userprogs-always-y += bpf-fancy dropper bpf-direct user-trap
-HOSTCFLAGS_bpf-fancy.o += -I$(objtree)/usr/include
-HOSTCFLAGS_bpf-fancy.o += -idirafter $(objtree)/include
-HOSTCFLAGS_bpf-helper.o += -I$(objtree)/usr/include
-HOSTCFLAGS_bpf-helper.o += -idirafter $(objtree)/include
bpf-fancy-objs := bpf-fancy.o bpf-helper.o
-HOSTCFLAGS_dropper.o += -I$(objtree)/usr/include
-HOSTCFLAGS_dropper.o += -idirafter $(objtree)/include
-dropper-objs := dropper.o
-
-HOSTCFLAGS_bpf-direct.o += -I$(objtree)/usr/include
-HOSTCFLAGS_bpf-direct.o += -idirafter $(objtree)/include
-bpf-direct-objs := bpf-direct.o
-
-HOSTCFLAGS_user-trap.o += -I$(objtree)/usr/include
-HOSTCFLAGS_user-trap.o += -idirafter $(objtree)/include
-user-trap-objs := user-trap.o
-
-# Try to match the kernel target.
-ifndef CONFIG_64BIT
-
-# s390 has -m31 flag to build 31 bit binaries
-ifndef CONFIG_S390
-MFLAG = -m32
-else
-MFLAG = -m31
-endif
-
-HOSTCFLAGS_bpf-direct.o += $(MFLAG)
-HOSTCFLAGS_dropper.o += $(MFLAG)
-HOSTCFLAGS_bpf-helper.o += $(MFLAG)
-HOSTCFLAGS_bpf-fancy.o += $(MFLAG)
-HOSTCFLAGS_user-trap.o += $(MFLAG)
-HOSTLDLIBS_bpf-direct += $(MFLAG)
-HOSTLDLIBS_bpf-fancy += $(MFLAG)
-HOSTLDLIBS_dropper += $(MFLAG)
-HOSTLDLIBS_user-trap += $(MFLAG)
-endif
-always-y := $(hostprogs)
-endif
+userccflags += -I usr/include
diff --git a/samples/seccomp/bpf-helper.h b/samples/seccomp/bpf-helper.h
index 0cc9816fe8e8..417e48a4c4df 100644
--- a/samples/seccomp/bpf-helper.h
+++ b/samples/seccomp/bpf-helper.h
@@ -62,9 +62,9 @@ void seccomp_bpf_print(struct sock_filter *filter, size_t count);
#define EXPAND(...) __VA_ARGS__
/* Ensure that we load the logically correct offset. */
-#if __BYTE_ORDER == __LITTLE_ENDIAN
+#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
#define LO_ARG(idx) offsetof(struct seccomp_data, args[(idx)])
-#elif __BYTE_ORDER == __BIG_ENDIAN
+#elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
#define LO_ARG(idx) offsetof(struct seccomp_data, args[(idx)]) + sizeof(__u32)
#else
#error "Unknown endianness"
@@ -85,10 +85,10 @@ void seccomp_bpf_print(struct sock_filter *filter, size_t count);
#elif __BITS_PER_LONG == 64
/* Ensure that we load the logically correct offset. */
-#if __BYTE_ORDER == __LITTLE_ENDIAN
+#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
#define ENDIAN(_lo, _hi) _lo, _hi
#define HI_ARG(idx) offsetof(struct seccomp_data, args[(idx)]) + sizeof(__u32)
-#elif __BYTE_ORDER == __BIG_ENDIAN
+#elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
#define ENDIAN(_lo, _hi) _hi, _lo
#define HI_ARG(idx) offsetof(struct seccomp_data, args[(idx)])
#endif
diff --git a/samples/seccomp/dropper.c b/samples/seccomp/dropper.c
index cc0648eb389e..4bca4b70f665 100644
--- a/samples/seccomp/dropper.c
+++ b/samples/seccomp/dropper.c
@@ -25,7 +25,7 @@
#include <sys/prctl.h>
#include <unistd.h>
-static int install_filter(int nr, int arch, int error)
+static int install_filter(int arch, int nr, int error)
{
struct sock_filter filter[] = {
BPF_STMT(BPF_LD+BPF_W+BPF_ABS,
@@ -42,6 +42,10 @@ static int install_filter(int nr, int arch, int error)
.len = (unsigned short)(sizeof(filter)/sizeof(filter[0])),
.filter = filter,
};
+ if (error == -1) {
+ struct sock_filter kill = BPF_STMT(BPF_RET+BPF_K, SECCOMP_RET_KILL);
+ filter[4] = kill;
+ }
if (prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0)) {
perror("prctl(NO_NEW_PRIVS)");
return 1;
@@ -57,9 +61,10 @@ int main(int argc, char **argv)
{
if (argc < 5) {
fprintf(stderr, "Usage:\n"
- "dropper <syscall_nr> <arch> <errno> <prog> [<args>]\n"
+ "dropper <arch> <syscall_nr> <errno> <prog> [<args>]\n"
"Hint: AUDIT_ARCH_I386: 0x%X\n"
" AUDIT_ARCH_X86_64: 0x%X\n"
+ " errno == -1 means SECCOMP_RET_KILL\n"
"\n", AUDIT_ARCH_I386, AUDIT_ARCH_X86_64);
return 1;
}
diff --git a/samples/timers/.gitignore b/samples/timers/.gitignore
index c5c45d7ec0df..cd9ff7b95383 100644
--- a/samples/timers/.gitignore
+++ b/samples/timers/.gitignore
@@ -1 +1,2 @@
-hpet_example
+# SPDX-License-Identifier: GPL-2.0-only
+/hpet_example
diff --git a/samples/timers/Makefile b/samples/timers/Makefile
index f9fa07460802..e6836cdea4e2 100644
--- a/samples/timers/Makefile
+++ b/samples/timers/Makefile
@@ -1,16 +1,4 @@
# SPDX-License-Identifier: GPL-2.0
-ifndef CROSS_COMPILE
-uname_M := $(shell uname -m 2>/dev/null || echo not)
-ARCH ?= $(shell echo $(uname_M) | sed -e s/i.86/x86/ -e s/x86_64/x86/)
+userprogs-always-y += hpet_example
-ifeq ($(ARCH),x86)
-CC := $(CROSS_COMPILE)gcc
-PROGS := hpet_example
-
-all: $(PROGS)
-
-clean:
- rm -fr $(PROGS)
-
-endif
-endif
+userccflags += -I usr/include
diff --git a/samples/trace_events/Makefile b/samples/trace_events/Makefile
index b78344e7bbed..b3808bb4cf8b 100644
--- a/samples/trace_events/Makefile
+++ b/samples/trace_events/Makefile
@@ -11,5 +11,7 @@
# Here trace-events-sample.c does the CREATE_TRACE_POINTS.
#
CFLAGS_trace-events-sample.o := -I$(src)
+CFLAGS_trace_custom_sched.o := -I$(src)
obj-$(CONFIG_SAMPLE_TRACE_EVENTS) += trace-events-sample.o
+obj-$(CONFIG_SAMPLE_TRACE_CUSTOM_EVENTS) += trace_custom_sched.o
diff --git a/samples/trace_events/trace-events-sample.c b/samples/trace_events/trace-events-sample.c
index 1a72b7d95cdc..608c4ae3b08a 100644
--- a/samples/trace_events/trace-events-sample.c
+++ b/samples/trace_events/trace-events-sample.c
@@ -19,8 +19,10 @@ static const char *random_strings[] = {
"One ring to rule them all"
};
-static void simple_thread_func(int cnt)
+static void do_simple_thread_func(int cnt, const char *fmt, ...)
{
+ unsigned long bitmask[1] = {0xdeadbeefUL};
+ va_list va;
int array[6];
int len = cnt % 5;
int i;
@@ -32,9 +34,13 @@ static void simple_thread_func(int cnt)
array[i] = i + 1;
array[i] = 0;
+ va_start(va, fmt);
+
/* Silly tracepoints */
trace_foo_bar("hello", cnt, array, random_strings[len],
- current->cpus_ptr);
+ current->cpus_ptr, fmt, &va);
+
+ va_end(va);
trace_foo_with_template_simple("HELLO", cnt);
@@ -43,6 +49,13 @@ static void simple_thread_func(int cnt)
trace_foo_with_template_cond("prints other times", cnt);
trace_foo_with_template_print("I have to be different", cnt);
+
+ trace_foo_rel_loc("Hello __rel_loc", cnt, bitmask);
+}
+
+static void simple_thread_func(int cnt)
+{
+ do_simple_thread_func(cnt, "iter=%d", cnt);
}
static int simple_thread(void *arg)
diff --git a/samples/trace_events/trace-events-sample.h b/samples/trace_events/trace-events-sample.h
index 80b4a70315b6..1a92226202fc 100644
--- a/samples/trace_events/trace-events-sample.h
+++ b/samples/trace_events/trace-events-sample.h
@@ -141,6 +141,54 @@
* In most cases, the __assign_str() macro will take the same
* parameters as the __string() macro had to declare the string.
*
+ * __vstring: This is similar to __string() but instead of taking a
+ * dynamic length, it takes a variable list va_list 'va' variable.
+ * Some event callers already have a message from parameters saved
+ * in a va_list. Passing in the format and the va_list variable
+ * will save just enough on the ring buffer for that string.
+ * Note, the va variable used is a pointer to a va_list, not
+ * to the va_list directly.
+ *
+ * (va_list *va)
+ *
+ * __vstring(foo, fmt, va) is similar to: vsnprintf(foo, fmt, va)
+ *
+ * To assign the string, use the helper macro __assign_vstr().
+ *
+ * __assign_vstr(foo, fmt, va);
+ *
+ * In most cases, the __assign_vstr() macro will take the same
+ * parameters as the __vstring() macro had to declare the string.
+ * Use __get_str() to retrieve the __vstring() just like it would for
+ * __string().
+ *
+ * __string_len: This is a helper to a __dynamic_array, but it understands
+ * that the array has characters in it, and with the combined
+ * use of __assign_str_len(), it will allocate 'len' + 1 bytes
+ * in the ring buffer and add a '\0' to the string. This is
+ * useful if the string being saved has no terminating '\0' byte.
+ * It requires that the length of the string is known as it acts
+ * like a memcpy().
+ *
+ * Declared with:
+ *
+ * __string_len(foo, bar, len)
+ *
+ * To assign this string, use the helper macro __assign_str_len().
+ *
+ * __assign_str_len(foo, bar, len);
+ *
+ * Then len + 1 is allocated to the ring buffer, and a nul terminating
+ * byte is added. This is similar to:
+ *
+ * memcpy(__get_str(foo), bar, len);
+ * __get_str(foo)[len] = 0;
+ *
+ * The advantage of using this over __dynamic_array, is that it
+ * takes care of allocating the extra byte on the ring buffer
+ * for the '\0' terminating byte, and __get_str(foo) can be used
+ * in the TP_printk().
+ *
* __bitmask: This is another kind of __dynamic_array, but it expects
* an array of longs, and the number of bits to parse. It takes
* two parameters (name, nr_bits), where name is the name of the
@@ -229,9 +277,10 @@ TRACE_DEFINE_ENUM(TRACE_SAMPLE_ZOO);
TRACE_EVENT(foo_bar,
TP_PROTO(const char *foo, int bar, const int *lst,
- const char *string, const struct cpumask *mask),
+ const char *string, const struct cpumask *mask,
+ const char *fmt, va_list *va),
- TP_ARGS(foo, bar, lst, string, mask),
+ TP_ARGS(foo, bar, lst, string, mask, fmt, va),
TP_STRUCT__entry(
__array( char, foo, 10 )
@@ -239,6 +288,7 @@ TRACE_EVENT(foo_bar,
__dynamic_array(int, list, __length_of(lst))
__string( str, string )
__bitmask( cpus, num_possible_cpus() )
+ __vstring( vstr, fmt, va )
),
TP_fast_assign(
@@ -247,10 +297,11 @@ TRACE_EVENT(foo_bar,
memcpy(__get_dynamic_array(list), lst,
__length_of(lst) * sizeof(int));
__assign_str(str, string);
+ __assign_vstr(vstr, fmt, va);
__assign_bitmask(cpus, cpumask_bits(mask), num_possible_cpus());
),
- TP_printk("foo %s %d %s %s %s %s (%s)", __entry->foo, __entry->bar,
+ TP_printk("foo %s %d %s %s %s %s (%s) %s", __entry->foo, __entry->bar,
/*
* Notice here the use of some helper functions. This includes:
@@ -294,7 +345,7 @@ TRACE_EVENT(foo_bar,
__print_array(__get_dynamic_array(list),
__get_dynamic_array_len(list) / sizeof(int),
sizeof(int)),
- __get_str(str), __get_bitmask(cpus))
+ __get_str(str), __get_bitmask(cpus), __get_str(vstr))
);
/*
@@ -416,7 +467,7 @@ TRACE_EVENT_FN(foo_bar_with_fn,
* Note, TRACE_EVENT() itself is simply defined as:
*
* #define TRACE_EVENT(name, proto, args, tstruct, assign, printk) \
- * DEFINE_EVENT_CLASS(name, proto, args, tstruct, assign, printk); \
+ * DECLARE_EVENT_CLASS(name, proto, args, tstruct, assign, printk); \
* DEFINE_EVENT(name, name, proto, args)
*
* The DEFINE_EVENT() also can be declared with conditions and reg functions:
@@ -479,6 +530,39 @@ DEFINE_EVENT_PRINT(foo_template, foo_with_template_print,
TP_ARGS(foo, bar),
TP_printk("bar %s %d", __get_str(foo), __entry->bar));
+/*
+ * There are yet another __rel_loc dynamic data attribute. If you
+ * use __rel_dynamic_array() and __rel_string() etc. macros, you
+ * can use this attribute. There is no difference from the viewpoint
+ * of functionality with/without 'rel' but the encoding is a bit
+ * different. This is expected to be used with user-space event,
+ * there is no reason that the kernel event use this, but only for
+ * testing.
+ */
+
+TRACE_EVENT(foo_rel_loc,
+
+ TP_PROTO(const char *foo, int bar, unsigned long *mask),
+
+ TP_ARGS(foo, bar, mask),
+
+ TP_STRUCT__entry(
+ __rel_string( foo, foo )
+ __field( int, bar )
+ __rel_bitmask( bitmask,
+ BITS_PER_BYTE * sizeof(unsigned long) )
+ ),
+
+ TP_fast_assign(
+ __assign_rel_str(foo, foo);
+ __entry->bar = bar;
+ __assign_rel_bitmask(bitmask, mask,
+ BITS_PER_BYTE * sizeof(unsigned long));
+ ),
+
+ TP_printk("foo_rel_loc %s, %d, %s", __get_rel_str(foo), __entry->bar,
+ __get_rel_bitmask(bitmask))
+);
#endif
/***** NOTICE! The #if protection ends here. *****/
diff --git a/samples/trace_events/trace_custom_sched.c b/samples/trace_events/trace_custom_sched.c
new file mode 100644
index 000000000000..b99d9ab7db85
--- /dev/null
+++ b/samples/trace_events/trace_custom_sched.c
@@ -0,0 +1,60 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * event tracer
+ *
+ * Copyright (C) 2022 Google Inc, Steven Rostedt <rostedt@goodmis.org>
+ */
+
+#define pr_fmt(fmt) fmt
+
+#include <linux/trace_events.h>
+#include <linux/version.h>
+#include <linux/module.h>
+#include <linux/sched.h>
+
+/*
+ * Must include the event header that the custom event will attach to,
+ * from the C file, and not in the custom header file.
+ */
+#include <trace/events/sched.h>
+
+/* Declare CREATE_CUSTOM_TRACE_EVENTS before including custom header */
+#define CREATE_CUSTOM_TRACE_EVENTS
+
+#include "trace_custom_sched.h"
+
+/*
+ * As the trace events are not exported to modules, the use of
+ * for_each_kernel_tracepoint() is needed to find the trace event
+ * to attach to. The fct() function below, is a callback that
+ * will be called for every event.
+ *
+ * Helper functions are created by the TRACE_CUSTOM_EVENT() macro
+ * update the event. Those are of the form:
+ *
+ * trace_custom_event_<event>_update()
+ *
+ * Where <event> is the event to attach.
+ */
+static void fct(struct tracepoint *tp, void *priv)
+{
+ trace_custom_event_sched_switch_update(tp);
+ trace_custom_event_sched_waking_update(tp);
+}
+
+static int __init trace_sched_init(void)
+{
+ for_each_kernel_tracepoint(fct, NULL);
+ return 0;
+}
+
+static void __exit trace_sched_exit(void)
+{
+}
+
+module_init(trace_sched_init);
+module_exit(trace_sched_exit);
+
+MODULE_AUTHOR("Steven Rostedt");
+MODULE_DESCRIPTION("Custom scheduling events");
+MODULE_LICENSE("GPL");
diff --git a/samples/trace_events/trace_custom_sched.h b/samples/trace_events/trace_custom_sched.h
new file mode 100644
index 000000000000..951388334a3f
--- /dev/null
+++ b/samples/trace_events/trace_custom_sched.h
@@ -0,0 +1,96 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+/*
+ * Like the headers that use TRACE_EVENT(), the TRACE_CUSTOM_EVENT()
+ * needs a header that allows for multiple inclusions.
+ *
+ * Test for a unique name (here we have _TRACE_CUSTOM_SCHED_H),
+ * also allowing to continue if TRACE_CUSTOM_MULTI_READ is defined.
+ */
+#if !defined(_TRACE_CUSTOM_SCHED_H) || defined(TRACE_CUSTOM_MULTI_READ)
+#define _TRACE_CUSTOM_SCHED_H
+
+/* Include linux/trace_events.h for initial defines of TRACE_CUSTOM_EVENT() */
+#include <linux/trace_events.h>
+
+/*
+ * TRACE_CUSTOM_EVENT() is just like TRACE_EVENT(). The first parameter
+ * is the event name of an existing event where the TRACE_EVENT has been included
+ * in the C file before including this file.
+ */
+TRACE_CUSTOM_EVENT(sched_switch,
+
+ /*
+ * The TP_PROTO() and TP_ARGS must match the trace event
+ * that the custom event is using.
+ */
+ TP_PROTO(bool preempt,
+ struct task_struct *prev,
+ struct task_struct *next,
+ unsigned int prev_state),
+
+ TP_ARGS(preempt, prev, next, prev_state),
+
+ /*
+ * The next fields are where the customization happens.
+ * The TP_STRUCT__entry() defines what will be recorded
+ * in the ring buffer when the custom event triggers.
+ *
+ * The rest is just like the TRACE_EVENT() macro except that
+ * it uses the custom entry.
+ */
+ TP_STRUCT__entry(
+ __field( unsigned short, prev_prio )
+ __field( unsigned short, next_prio )
+ __field( pid_t, next_pid )
+ ),
+
+ TP_fast_assign(
+ __entry->prev_prio = prev->prio;
+ __entry->next_pid = next->pid;
+ __entry->next_prio = next->prio;
+ ),
+
+ TP_printk("prev_prio=%d next_pid=%d next_prio=%d",
+ __entry->prev_prio, __entry->next_pid, __entry->next_prio)
+)
+
+
+TRACE_CUSTOM_EVENT(sched_waking,
+
+ TP_PROTO(struct task_struct *p),
+
+ TP_ARGS(p),
+
+ TP_STRUCT__entry(
+ __field( pid_t, pid )
+ __field( unsigned short, prio )
+ ),
+
+ TP_fast_assign(
+ __entry->pid = p->pid;
+ __entry->prio = p->prio;
+ ),
+
+ TP_printk("pid=%d prio=%d", __entry->pid, __entry->prio)
+)
+#endif
+/*
+ * Just like the headers that create TRACE_EVENTs, the below must
+ * be outside the protection of the above #if block.
+ */
+
+/*
+ * It is required that the Makefile includes:
+ * CFLAGS_<c_file>.o := -I$(src)
+ */
+#undef TRACE_INCLUDE_PATH
+#undef TRACE_INCLUDE_FILE
+#define TRACE_INCLUDE_PATH .
+
+/*
+ * It is requred that the TRACE_INCLUDE_FILE be the same
+ * as this file without the ".h".
+ */
+#define TRACE_INCLUDE_FILE trace_custom_sched
+#include <trace/define_custom_trace.h>
diff --git a/samples/uhid/.gitignore b/samples/uhid/.gitignore
new file mode 100644
index 000000000000..0e0a5a929f5d
--- /dev/null
+++ b/samples/uhid/.gitignore
@@ -0,0 +1,2 @@
+# SPDX-License-Identifier: GPL-2.0-only
+/uhid-example
diff --git a/samples/uhid/Makefile b/samples/uhid/Makefile
index 5f44ea40d6d5..0aa424ec4899 100644
--- a/samples/uhid/Makefile
+++ b/samples/uhid/Makefile
@@ -1,8 +1,4 @@
# SPDX-License-Identifier: GPL-2.0-only
-# List of programs to build
-hostprogs := uhid-example
+userprogs-always-y += uhid-example
-# Tell kbuild to always build the programs
-always-y := $(hostprogs)
-
-HOSTCFLAGS_uhid-example.o += -I$(objtree)/usr/include
+userccflags += -I usr/include
diff --git a/samples/uhid/uhid-example.c b/samples/uhid/uhid-example.c
index b72d645ce828..015cb06a241e 100644
--- a/samples/uhid/uhid-example.c
+++ b/samples/uhid/uhid-example.c
@@ -165,7 +165,7 @@ static int uhid_write(int fd, const struct uhid_event *ev)
fprintf(stderr, "Cannot write to uhid: %m\n");
return -errno;
} else if (ret != sizeof(*ev)) {
- fprintf(stderr, "Wrong size written to uhid: %ld != %lu\n",
+ fprintf(stderr, "Wrong size written to uhid: %zd != %zu\n",
ret, sizeof(ev));
return -EFAULT;
} else {
@@ -236,7 +236,7 @@ static int event(int fd)
fprintf(stderr, "Cannot read uhid-cdev: %m\n");
return -errno;
} else if (ret != sizeof(ev)) {
- fprintf(stderr, "Invalid size read from uhid-dev: %ld != %lu\n",
+ fprintf(stderr, "Invalid size read from uhid-dev: %zd != %zu\n",
ret, sizeof(ev));
return -EFAULT;
}
diff --git a/samples/user_events/Makefile b/samples/user_events/Makefile
new file mode 100644
index 000000000000..7252b589db57
--- /dev/null
+++ b/samples/user_events/Makefile
@@ -0,0 +1,5 @@
+# SPDX-License-Identifier: GPL-2.0
+CFLAGS += -Wl,-no-as-needed -Wall -I../../usr/include
+
+example: example.o
+example.o: example.c
diff --git a/samples/user_events/example.c b/samples/user_events/example.c
new file mode 100644
index 000000000000..d06dc24156ec
--- /dev/null
+++ b/samples/user_events/example.c
@@ -0,0 +1,102 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (c) 2021, Microsoft Corporation.
+ *
+ * Authors:
+ * Beau Belgrave <beaub@linux.microsoft.com>
+ */
+
+#include <errno.h>
+#include <sys/ioctl.h>
+#include <sys/mman.h>
+#include <fcntl.h>
+#include <stdio.h>
+#include <unistd.h>
+#include <asm/bitsperlong.h>
+#include <endian.h>
+#include <linux/user_events.h>
+
+#if __BITS_PER_LONG == 64
+#define endian_swap(x) htole64(x)
+#else
+#define endian_swap(x) htole32(x)
+#endif
+
+/* Assumes debugfs is mounted */
+const char *data_file = "/sys/kernel/debug/tracing/user_events_data";
+const char *status_file = "/sys/kernel/debug/tracing/user_events_status";
+
+static int event_status(long **status)
+{
+ int fd = open(status_file, O_RDONLY);
+
+ *status = mmap(NULL, sysconf(_SC_PAGESIZE), PROT_READ,
+ MAP_SHARED, fd, 0);
+
+ close(fd);
+
+ if (*status == MAP_FAILED)
+ return -1;
+
+ return 0;
+}
+
+static int event_reg(int fd, const char *command, long *index, long *mask,
+ int *write)
+{
+ struct user_reg reg = {0};
+
+ reg.size = sizeof(reg);
+ reg.name_args = (__u64)command;
+
+ if (ioctl(fd, DIAG_IOCSREG, &reg) == -1)
+ return -1;
+
+ *index = reg.status_bit / __BITS_PER_LONG;
+ *mask = endian_swap(1L << (reg.status_bit % __BITS_PER_LONG));
+ *write = reg.write_index;
+
+ return 0;
+}
+
+int main(int argc, char **argv)
+{
+ int data_fd, write;
+ long index, mask;
+ long *status_page;
+ struct iovec io[2];
+ __u32 count = 0;
+
+ if (event_status(&status_page) == -1)
+ return errno;
+
+ data_fd = open(data_file, O_RDWR);
+
+ if (event_reg(data_fd, "test u32 count", &index, &mask, &write) == -1)
+ return errno;
+
+ /* Setup iovec */
+ io[0].iov_base = &write;
+ io[0].iov_len = sizeof(write);
+ io[1].iov_base = &count;
+ io[1].iov_len = sizeof(count);
+
+ask:
+ printf("Press enter to check status...\n");
+ getchar();
+
+ /* Check if anyone is listening */
+ if (status_page[index] & mask) {
+ /* Yep, trace out our data */
+ writev(data_fd, (const struct iovec *)io, 2);
+
+ /* Increase the count */
+ count++;
+
+ printf("Something was attached, wrote data\n");
+ }
+
+ goto ask;
+
+ return 0;
+}
diff --git a/samples/v4l/v4l2-pci-skeleton.c b/samples/v4l/v4l2-pci-skeleton.c
index f6a551bd57ef..a61f94db18d9 100644
--- a/samples/v4l/v4l2-pci-skeleton.c
+++ b/samples/v4l/v4l2-pci-skeleton.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-only
/*
* This is a V4L2 PCI Skeleton Driver. It gives an initial skeleton source
* for use with other PCI drivers.
@@ -6,19 +7,6 @@
* input 0 and an HDMI connector as input 1.
*
* Copyright 2014 Cisco Systems, Inc. and/or its affiliates. All rights reserved.
- *
- * This program is free software; you may redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; version 2 of the License.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
*/
#include <linux/types.h>
@@ -766,7 +754,7 @@ static int skeleton_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
ret = pci_enable_device(pdev);
if (ret)
return ret;
- ret = pci_set_dma_mask(pdev, DMA_BIT_MASK(32));
+ ret = dma_set_mask(&pdev->dev, DMA_BIT_MASK(32));
if (ret) {
dev_err(&pdev->dev, "no suitable DMA available.\n");
goto disable_pci;
@@ -879,7 +867,7 @@ static int skeleton_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
vdev->tvnorms = SKEL_TVNORMS;
video_set_drvdata(vdev, skel);
- ret = video_register_device(vdev, VFL_TYPE_GRABBER, -1);
+ ret = video_register_device(vdev, VFL_TYPE_VIDEO, -1);
if (ret)
goto free_hdl;
diff --git a/samples/vfio-mdev/mbochs.c b/samples/vfio-mdev/mbochs.c
index 3cc5e5921682..117a8d799f71 100644
--- a/samples/vfio-mdev/mbochs.c
+++ b/samples/vfio-mdev/mbochs.c
@@ -21,7 +21,6 @@
*/
#include <linux/init.h>
#include <linux/module.h>
-#include <linux/device.h>
#include <linux/kernel.h>
#include <linux/slab.h>
#include <linux/vmalloc.h>
@@ -100,36 +99,46 @@ MODULE_PARM_DESC(mem, "megabytes available to " MBOCHS_NAME " devices");
#define MBOCHS_TYPE_2 "medium"
#define MBOCHS_TYPE_3 "large"
-static const struct mbochs_type {
- const char *name;
+static struct mbochs_type {
+ struct mdev_type type;
u32 mbytes;
u32 max_x;
u32 max_y;
} mbochs_types[] = {
{
- .name = MBOCHS_CLASS_NAME "-" MBOCHS_TYPE_1,
+ .type.sysfs_name = MBOCHS_TYPE_1,
+ .type.pretty_name = MBOCHS_CLASS_NAME "-" MBOCHS_TYPE_1,
.mbytes = 4,
.max_x = 800,
.max_y = 600,
}, {
- .name = MBOCHS_CLASS_NAME "-" MBOCHS_TYPE_2,
+ .type.sysfs_name = MBOCHS_TYPE_2,
+ .type.pretty_name = MBOCHS_CLASS_NAME "-" MBOCHS_TYPE_2,
.mbytes = 16,
.max_x = 1920,
.max_y = 1440,
}, {
- .name = MBOCHS_CLASS_NAME "-" MBOCHS_TYPE_3,
+ .type.sysfs_name = MBOCHS_TYPE_3,
+ .type.pretty_name = MBOCHS_CLASS_NAME "-" MBOCHS_TYPE_3,
.mbytes = 64,
.max_x = 0,
.max_y = 0,
},
};
+static struct mdev_type *mbochs_mdev_types[] = {
+ &mbochs_types[0].type,
+ &mbochs_types[1].type,
+ &mbochs_types[2].type,
+};
static dev_t mbochs_devt;
static struct class *mbochs_class;
static struct cdev mbochs_cdev;
static struct device mbochs_dev;
-static int mbochs_used_mbytes;
+static struct mdev_parent mbochs_parent;
+static atomic_t mbochs_avail_mbytes;
+static const struct vfio_device_ops mbochs_dev_ops;
struct vfio_region_info_ext {
struct vfio_region_info base;
@@ -160,6 +169,7 @@ struct mbochs_dmabuf {
/* State of each mdev device */
struct mdev_state {
+ struct vfio_device vdev;
u8 *vconfig;
u64 bar_mask[3];
u32 memory_bar_mask;
@@ -205,16 +215,6 @@ static struct page *__mbochs_get_page(struct mdev_state *mdev_state,
static struct page *mbochs_get_page(struct mdev_state *mdev_state,
pgoff_t pgoff);
-static const struct mbochs_type *mbochs_find_type(struct kobject *kobj)
-{
- int i;
-
- for (i = 0; i < ARRAY_SIZE(mbochs_types); i++)
- if (strcmp(mbochs_types[i].name, kobj->name) == 0)
- return mbochs_types + i;
- return NULL;
-}
-
static void mbochs_create_config_space(struct mdev_state *mdev_state)
{
STORE_LE16((u16 *) &mdev_state->vconfig[PCI_VENDOR_ID],
@@ -435,11 +435,9 @@ static void handle_edid_blob(struct mdev_state *mdev_state, u16 offset,
memcpy(buf, mdev_state->edid_blob + offset, count);
}
-static ssize_t mdev_access(struct mdev_device *mdev, char *buf, size_t count,
- loff_t pos, bool is_write)
+static ssize_t mdev_access(struct mdev_state *mdev_state, char *buf,
+ size_t count, loff_t pos, bool is_write)
{
- struct mdev_state *mdev_state = mdev_get_drvdata(mdev);
- struct device *dev = mdev_dev(mdev);
struct page *pg;
loff_t poff;
char *map;
@@ -488,7 +486,7 @@ static ssize_t mdev_access(struct mdev_device *mdev, char *buf, size_t count,
put_page(pg);
} else {
- dev_dbg(dev, "%s: %s @0x%llx (unhandled)\n",
+ dev_dbg(mdev_state->vdev.dev, "%s: %s @0x%llx (unhandled)\n",
__func__, is_write ? "WR" : "RD", pos);
ret = -1;
goto accessfailed;
@@ -503,9 +501,8 @@ accessfailed:
return ret;
}
-static int mbochs_reset(struct mdev_device *mdev)
+static int mbochs_reset(struct mdev_state *mdev_state)
{
- struct mdev_state *mdev_state = mdev_get_drvdata(mdev);
u32 size64k = mdev_state->memsize / (64 * 1024);
int i;
@@ -516,24 +513,25 @@ static int mbochs_reset(struct mdev_device *mdev)
return 0;
}
-static int mbochs_create(struct kobject *kobj, struct mdev_device *mdev)
+static int mbochs_init_dev(struct vfio_device *vdev)
{
- const struct mbochs_type *type = mbochs_find_type(kobj);
- struct device *dev = mdev_dev(mdev);
- struct mdev_state *mdev_state;
-
- if (!type)
- type = &mbochs_types[0];
- if (type->mbytes + mbochs_used_mbytes > max_mbytes)
- return -ENOMEM;
-
- mdev_state = kzalloc(sizeof(struct mdev_state), GFP_KERNEL);
- if (mdev_state == NULL)
- return -ENOMEM;
+ struct mdev_state *mdev_state =
+ container_of(vdev, struct mdev_state, vdev);
+ struct mdev_device *mdev = to_mdev_device(vdev->dev);
+ struct mbochs_type *type =
+ container_of(mdev->type, struct mbochs_type, type);
+ int avail_mbytes = atomic_read(&mbochs_avail_mbytes);
+ int ret = -ENOMEM;
+
+ do {
+ if (avail_mbytes < type->mbytes)
+ return -ENOSPC;
+ } while (!atomic_try_cmpxchg(&mbochs_avail_mbytes, &avail_mbytes,
+ avail_mbytes - type->mbytes));
mdev_state->vconfig = kzalloc(MBOCHS_CONFIG_SPACE_SIZE, GFP_KERNEL);
- if (mdev_state->vconfig == NULL)
- goto err_mem;
+ if (!mdev_state->vconfig)
+ goto err_avail;
mdev_state->memsize = type->mbytes * 1024 * 1024;
mdev_state->pagecount = mdev_state->memsize >> PAGE_SHIFT;
@@ -541,14 +539,10 @@ static int mbochs_create(struct kobject *kobj, struct mdev_device *mdev)
sizeof(struct page *),
GFP_KERNEL);
if (!mdev_state->pages)
- goto err_mem;
-
- dev_info(dev, "%s: %s, %d MB, %ld pages\n", __func__,
- kobj->name, type->mbytes, mdev_state->pagecount);
+ goto err_vconfig;
mutex_init(&mdev_state->ops_lock);
mdev_state->mdev = mdev;
- mdev_set_drvdata(mdev, mdev_state);
INIT_LIST_HEAD(&mdev_state->dmabufs);
mdev_state->next_id = 1;
@@ -558,32 +552,64 @@ static int mbochs_create(struct kobject *kobj, struct mdev_device *mdev)
mdev_state->edid_regs.edid_offset = MBOCHS_EDID_BLOB_OFFSET;
mdev_state->edid_regs.edid_max_size = sizeof(mdev_state->edid_blob);
mbochs_create_config_space(mdev_state);
- mbochs_reset(mdev);
+ mbochs_reset(mdev_state);
- mbochs_used_mbytes += type->mbytes;
+ dev_info(vdev->dev, "%s: %s, %d MB, %ld pages\n", __func__,
+ type->type.pretty_name, type->mbytes, mdev_state->pagecount);
return 0;
-err_mem:
+err_vconfig:
kfree(mdev_state->vconfig);
- kfree(mdev_state);
- return -ENOMEM;
+err_avail:
+ atomic_add(type->mbytes, &mbochs_avail_mbytes);
+ return ret;
}
-static int mbochs_remove(struct mdev_device *mdev)
+static int mbochs_probe(struct mdev_device *mdev)
{
- struct mdev_state *mdev_state = mdev_get_drvdata(mdev);
+ struct mdev_state *mdev_state;
+ int ret = -ENOMEM;
+
+ mdev_state = vfio_alloc_device(mdev_state, vdev, &mdev->dev,
+ &mbochs_dev_ops);
+ if (IS_ERR(mdev_state))
+ return PTR_ERR(mdev_state);
- mbochs_used_mbytes -= mdev_state->type->mbytes;
- mdev_set_drvdata(mdev, NULL);
+ ret = vfio_register_emulated_iommu_dev(&mdev_state->vdev);
+ if (ret)
+ goto err_put_vdev;
+ dev_set_drvdata(&mdev->dev, mdev_state);
+ return 0;
+
+err_put_vdev:
+ vfio_put_device(&mdev_state->vdev);
+ return ret;
+}
+
+static void mbochs_release_dev(struct vfio_device *vdev)
+{
+ struct mdev_state *mdev_state =
+ container_of(vdev, struct mdev_state, vdev);
+
+ atomic_add(mdev_state->type->mbytes, &mbochs_avail_mbytes);
kfree(mdev_state->pages);
kfree(mdev_state->vconfig);
- kfree(mdev_state);
- return 0;
+ vfio_free_device(vdev);
}
-static ssize_t mbochs_read(struct mdev_device *mdev, char __user *buf,
+static void mbochs_remove(struct mdev_device *mdev)
+{
+ struct mdev_state *mdev_state = dev_get_drvdata(&mdev->dev);
+
+ vfio_unregister_group_dev(&mdev_state->vdev);
+ vfio_put_device(&mdev_state->vdev);
+}
+
+static ssize_t mbochs_read(struct vfio_device *vdev, char __user *buf,
size_t count, loff_t *ppos)
{
+ struct mdev_state *mdev_state =
+ container_of(vdev, struct mdev_state, vdev);
unsigned int done = 0;
int ret;
@@ -593,7 +619,7 @@ static ssize_t mbochs_read(struct mdev_device *mdev, char __user *buf,
if (count >= 4 && !(*ppos % 4)) {
u32 val;
- ret = mdev_access(mdev, (char *)&val, sizeof(val),
+ ret = mdev_access(mdev_state, (char *)&val, sizeof(val),
*ppos, false);
if (ret <= 0)
goto read_err;
@@ -605,7 +631,7 @@ static ssize_t mbochs_read(struct mdev_device *mdev, char __user *buf,
} else if (count >= 2 && !(*ppos % 2)) {
u16 val;
- ret = mdev_access(mdev, (char *)&val, sizeof(val),
+ ret = mdev_access(mdev_state, (char *)&val, sizeof(val),
*ppos, false);
if (ret <= 0)
goto read_err;
@@ -617,7 +643,7 @@ static ssize_t mbochs_read(struct mdev_device *mdev, char __user *buf,
} else {
u8 val;
- ret = mdev_access(mdev, (char *)&val, sizeof(val),
+ ret = mdev_access(mdev_state, (char *)&val, sizeof(val),
*ppos, false);
if (ret <= 0)
goto read_err;
@@ -640,9 +666,11 @@ read_err:
return -EFAULT;
}
-static ssize_t mbochs_write(struct mdev_device *mdev, const char __user *buf,
+static ssize_t mbochs_write(struct vfio_device *vdev, const char __user *buf,
size_t count, loff_t *ppos)
{
+ struct mdev_state *mdev_state =
+ container_of(vdev, struct mdev_state, vdev);
unsigned int done = 0;
int ret;
@@ -655,7 +683,7 @@ static ssize_t mbochs_write(struct mdev_device *mdev, const char __user *buf,
if (copy_from_user(&val, buf, sizeof(val)))
goto write_err;
- ret = mdev_access(mdev, (char *)&val, sizeof(val),
+ ret = mdev_access(mdev_state, (char *)&val, sizeof(val),
*ppos, true);
if (ret <= 0)
goto write_err;
@@ -667,7 +695,7 @@ static ssize_t mbochs_write(struct mdev_device *mdev, const char __user *buf,
if (copy_from_user(&val, buf, sizeof(val)))
goto write_err;
- ret = mdev_access(mdev, (char *)&val, sizeof(val),
+ ret = mdev_access(mdev_state, (char *)&val, sizeof(val),
*ppos, true);
if (ret <= 0)
goto write_err;
@@ -679,7 +707,7 @@ static ssize_t mbochs_write(struct mdev_device *mdev, const char __user *buf,
if (copy_from_user(&val, buf, sizeof(val)))
goto write_err;
- ret = mdev_access(mdev, (char *)&val, sizeof(val),
+ ret = mdev_access(mdev_state, (char *)&val, sizeof(val),
*ppos, true);
if (ret <= 0)
goto write_err;
@@ -765,9 +793,10 @@ static const struct vm_operations_struct mbochs_region_vm_ops = {
.fault = mbochs_region_vm_fault,
};
-static int mbochs_mmap(struct mdev_device *mdev, struct vm_area_struct *vma)
+static int mbochs_mmap(struct vfio_device *vdev, struct vm_area_struct *vma)
{
- struct mdev_state *mdev_state = mdev_get_drvdata(mdev);
+ struct mdev_state *mdev_state =
+ container_of(vdev, struct mdev_state, vdev);
if (vma->vm_pgoff != MBOCHS_MEMORY_BAR_OFFSET >> PAGE_SHIFT)
return -EINVAL;
@@ -846,7 +875,7 @@ static struct sg_table *mbochs_map_dmabuf(struct dma_buf_attachment *at,
if (sg_alloc_table_from_pages(sg, dmabuf->pages, dmabuf->pagecount,
0, dmabuf->mode.size, GFP_KERNEL) < 0)
goto err2;
- if (!dma_map_sg(at->dev, sg->sgl, sg->nents, direction))
+ if (dma_map_sgtable(at->dev, sg, direction, 0))
goto err3;
return sg;
@@ -868,6 +897,7 @@ static void mbochs_unmap_dmabuf(struct dma_buf_attachment *at,
dev_dbg(dev, "%s: %d\n", __func__, dmabuf->id);
+ dma_unmap_sgtable(at->dev, sg, direction, 0);
sg_free_table(sg);
kfree(sg);
}
@@ -973,7 +1003,7 @@ mbochs_dmabuf_find_by_id(struct mdev_state *mdev_state, u32 id)
static int mbochs_dmabuf_export(struct mbochs_dmabuf *dmabuf)
{
struct mdev_state *mdev_state = dmabuf->mdev_state;
- struct device *dev = mdev_dev(mdev_state->mdev);
+ struct device *dev = mdev_state->vdev.dev;
DEFINE_DMA_BUF_EXPORT_INFO(exp_info);
struct dma_buf *buf;
@@ -1001,15 +1031,10 @@ static int mbochs_dmabuf_export(struct mbochs_dmabuf *dmabuf)
return 0;
}
-static int mbochs_get_region_info(struct mdev_device *mdev,
+static int mbochs_get_region_info(struct mdev_state *mdev_state,
struct vfio_region_info_ext *ext)
{
struct vfio_region_info *region_info = &ext->base;
- struct mdev_state *mdev_state;
-
- mdev_state = mdev_get_drvdata(mdev);
- if (!mdev_state)
- return -EINVAL;
if (region_info->index >= MBOCHS_NUM_REGIONS)
return -EINVAL;
@@ -1057,15 +1082,13 @@ static int mbochs_get_region_info(struct mdev_device *mdev,
return 0;
}
-static int mbochs_get_irq_info(struct mdev_device *mdev,
- struct vfio_irq_info *irq_info)
+static int mbochs_get_irq_info(struct vfio_irq_info *irq_info)
{
irq_info->count = 0;
return 0;
}
-static int mbochs_get_device_info(struct mdev_device *mdev,
- struct vfio_device_info *dev_info)
+static int mbochs_get_device_info(struct vfio_device_info *dev_info)
{
dev_info->flags = VFIO_DEVICE_FLAGS_PCI;
dev_info->num_regions = MBOCHS_NUM_REGIONS;
@@ -1073,11 +1096,9 @@ static int mbochs_get_device_info(struct mdev_device *mdev,
return 0;
}
-static int mbochs_query_gfx_plane(struct mdev_device *mdev,
+static int mbochs_query_gfx_plane(struct mdev_state *mdev_state,
struct vfio_device_gfx_plane_info *plane)
{
- struct mdev_state *mdev_state = mdev_get_drvdata(mdev);
- struct device *dev = mdev_dev(mdev);
struct mbochs_dmabuf *dmabuf;
struct mbochs_mode mode;
int ret;
@@ -1131,18 +1152,16 @@ static int mbochs_query_gfx_plane(struct mdev_device *mdev,
done:
if (plane->drm_plane_type == DRM_PLANE_TYPE_PRIMARY &&
mdev_state->active_id != plane->dmabuf_id) {
- dev_dbg(dev, "%s: primary: %d => %d\n", __func__,
- mdev_state->active_id, plane->dmabuf_id);
+ dev_dbg(mdev_state->vdev.dev, "%s: primary: %d => %d\n",
+ __func__, mdev_state->active_id, plane->dmabuf_id);
mdev_state->active_id = plane->dmabuf_id;
}
mutex_unlock(&mdev_state->ops_lock);
return 0;
}
-static int mbochs_get_gfx_dmabuf(struct mdev_device *mdev,
- u32 id)
+static int mbochs_get_gfx_dmabuf(struct mdev_state *mdev_state, u32 id)
{
- struct mdev_state *mdev_state = mdev_get_drvdata(mdev);
struct mbochs_dmabuf *dmabuf;
mutex_lock(&mdev_state->ops_lock);
@@ -1164,9 +1183,11 @@ static int mbochs_get_gfx_dmabuf(struct mdev_device *mdev,
return dma_buf_fd(dmabuf->buf, 0);
}
-static long mbochs_ioctl(struct mdev_device *mdev, unsigned int cmd,
- unsigned long arg)
+static long mbochs_ioctl(struct vfio_device *vdev, unsigned int cmd,
+ unsigned long arg)
{
+ struct mdev_state *mdev_state =
+ container_of(vdev, struct mdev_state, vdev);
int ret = 0;
unsigned long minsz, outsz;
@@ -1183,7 +1204,7 @@ static long mbochs_ioctl(struct mdev_device *mdev, unsigned int cmd,
if (info.argsz < minsz)
return -EINVAL;
- ret = mbochs_get_device_info(mdev, &info);
+ ret = mbochs_get_device_info(&info);
if (ret)
return ret;
@@ -1207,7 +1228,7 @@ static long mbochs_ioctl(struct mdev_device *mdev, unsigned int cmd,
if (outsz > sizeof(info))
return -EINVAL;
- ret = mbochs_get_region_info(mdev, &info);
+ ret = mbochs_get_region_info(mdev_state, &info);
if (ret)
return ret;
@@ -1230,7 +1251,7 @@ static long mbochs_ioctl(struct mdev_device *mdev, unsigned int cmd,
(info.index >= VFIO_PCI_NUM_IRQS))
return -EINVAL;
- ret = mbochs_get_irq_info(mdev, &info);
+ ret = mbochs_get_irq_info(&info);
if (ret)
return ret;
@@ -1253,7 +1274,7 @@ static long mbochs_ioctl(struct mdev_device *mdev, unsigned int cmd,
if (plane.argsz < minsz)
return -EINVAL;
- ret = mbochs_query_gfx_plane(mdev, &plane);
+ ret = mbochs_query_gfx_plane(mdev_state, &plane);
if (ret)
return ret;
@@ -1270,29 +1291,22 @@ static long mbochs_ioctl(struct mdev_device *mdev, unsigned int cmd,
if (get_user(dmabuf_id, (__u32 __user *)arg))
return -EFAULT;
- return mbochs_get_gfx_dmabuf(mdev, dmabuf_id);
+ return mbochs_get_gfx_dmabuf(mdev_state, dmabuf_id);
}
case VFIO_DEVICE_SET_IRQS:
return -EINVAL;
case VFIO_DEVICE_RESET:
- return mbochs_reset(mdev);
+ return mbochs_reset(mdev_state);
}
return -ENOTTY;
}
-static int mbochs_open(struct mdev_device *mdev)
-{
- if (!try_module_get(THIS_MODULE))
- return -ENODEV;
-
- return 0;
-}
-
-static void mbochs_close(struct mdev_device *mdev)
+static void mbochs_close_device(struct vfio_device *vdev)
{
- struct mdev_state *mdev_state = mdev_get_drvdata(mdev);
+ struct mdev_state *mdev_state =
+ container_of(vdev, struct mdev_state, vdev);
struct mbochs_dmabuf *dmabuf, *tmp;
mutex_lock(&mdev_state->ops_lock);
@@ -1309,15 +1323,13 @@ static void mbochs_close(struct mdev_device *mdev)
mbochs_put_pages(mdev_state);
mutex_unlock(&mdev_state->ops_lock);
- module_put(THIS_MODULE);
}
static ssize_t
memory_show(struct device *dev, struct device_attribute *attr,
char *buf)
{
- struct mdev_device *mdev = mdev_from_dev(dev);
- struct mdev_state *mdev_state = mdev_get_drvdata(mdev);
+ struct mdev_state *mdev_state = dev_get_drvdata(dev);
return sprintf(buf, "%d MB\n", mdev_state->type->mbytes);
}
@@ -1333,87 +1345,50 @@ static const struct attribute_group mdev_dev_group = {
.attrs = mdev_dev_attrs,
};
-const struct attribute_group *mdev_dev_groups[] = {
+static const struct attribute_group *mdev_dev_groups[] = {
&mdev_dev_group,
NULL,
};
-static ssize_t
-name_show(struct kobject *kobj, struct device *dev, char *buf)
+static ssize_t mbochs_show_description(struct mdev_type *mtype, char *buf)
{
- return sprintf(buf, "%s\n", kobj->name);
-}
-MDEV_TYPE_ATTR_RO(name);
-
-static ssize_t
-description_show(struct kobject *kobj, struct device *dev, char *buf)
-{
- const struct mbochs_type *type = mbochs_find_type(kobj);
+ struct mbochs_type *type =
+ container_of(mtype, struct mbochs_type, type);
return sprintf(buf, "virtual display, %d MB video memory\n",
type ? type->mbytes : 0);
}
-MDEV_TYPE_ATTR_RO(description);
-static ssize_t
-available_instances_show(struct kobject *kobj, struct device *dev, char *buf)
+static unsigned int mbochs_get_available(struct mdev_type *mtype)
{
- const struct mbochs_type *type = mbochs_find_type(kobj);
- int count = (max_mbytes - mbochs_used_mbytes) / type->mbytes;
-
- return sprintf(buf, "%d\n", count);
-}
-MDEV_TYPE_ATTR_RO(available_instances);
+ struct mbochs_type *type =
+ container_of(mtype, struct mbochs_type, type);
-static ssize_t device_api_show(struct kobject *kobj, struct device *dev,
- char *buf)
-{
- return sprintf(buf, "%s\n", VFIO_DEVICE_API_PCI_STRING);
+ return atomic_read(&mbochs_avail_mbytes) / type->mbytes;
}
-MDEV_TYPE_ATTR_RO(device_api);
-
-static struct attribute *mdev_types_attrs[] = {
- &mdev_type_attr_name.attr,
- &mdev_type_attr_description.attr,
- &mdev_type_attr_device_api.attr,
- &mdev_type_attr_available_instances.attr,
- NULL,
-};
-
-static struct attribute_group mdev_type_group1 = {
- .name = MBOCHS_TYPE_1,
- .attrs = mdev_types_attrs,
-};
-
-static struct attribute_group mdev_type_group2 = {
- .name = MBOCHS_TYPE_2,
- .attrs = mdev_types_attrs,
-};
-static struct attribute_group mdev_type_group3 = {
- .name = MBOCHS_TYPE_3,
- .attrs = mdev_types_attrs,
+static const struct vfio_device_ops mbochs_dev_ops = {
+ .close_device = mbochs_close_device,
+ .init = mbochs_init_dev,
+ .release = mbochs_release_dev,
+ .read = mbochs_read,
+ .write = mbochs_write,
+ .ioctl = mbochs_ioctl,
+ .mmap = mbochs_mmap,
};
-static struct attribute_group *mdev_type_groups[] = {
- &mdev_type_group1,
- &mdev_type_group2,
- &mdev_type_group3,
- NULL,
-};
-
-static const struct mdev_parent_ops mdev_fops = {
- .owner = THIS_MODULE,
- .mdev_attr_groups = mdev_dev_groups,
- .supported_type_groups = mdev_type_groups,
- .create = mbochs_create,
- .remove = mbochs_remove,
- .open = mbochs_open,
- .release = mbochs_close,
- .read = mbochs_read,
- .write = mbochs_write,
- .ioctl = mbochs_ioctl,
- .mmap = mbochs_mmap,
+static struct mdev_driver mbochs_driver = {
+ .device_api = VFIO_DEVICE_API_PCI_STRING,
+ .driver = {
+ .name = "mbochs",
+ .owner = THIS_MODULE,
+ .mod_name = KBUILD_MODNAME,
+ .dev_groups = mdev_dev_groups,
+ },
+ .probe = mbochs_probe,
+ .remove = mbochs_remove,
+ .get_available = mbochs_get_available,
+ .show_description = mbochs_show_description,
};
static const struct file_operations vd_fops = {
@@ -1429,6 +1404,8 @@ static int __init mbochs_dev_init(void)
{
int ret = 0;
+ atomic_set(&mbochs_avail_mbytes, max_mbytes);
+
ret = alloc_chrdev_region(&mbochs_devt, 0, MINORMASK + 1, MBOCHS_NAME);
if (ret < 0) {
pr_err("Error: failed to register mbochs_dev, err: %d\n", ret);
@@ -1438,11 +1415,15 @@ static int __init mbochs_dev_init(void)
cdev_add(&mbochs_cdev, mbochs_devt, MINORMASK + 1);
pr_info("%s: major %d\n", __func__, MAJOR(mbochs_devt));
+ ret = mdev_register_driver(&mbochs_driver);
+ if (ret)
+ goto err_cdev;
+
mbochs_class = class_create(THIS_MODULE, MBOCHS_CLASS_NAME);
if (IS_ERR(mbochs_class)) {
pr_err("Error: failed to register mbochs_dev class\n");
ret = PTR_ERR(mbochs_class);
- goto failed1;
+ goto err_driver;
}
mbochs_dev.class = mbochs_class;
mbochs_dev.release = mbochs_device_release;
@@ -1450,19 +1431,23 @@ static int __init mbochs_dev_init(void)
ret = device_register(&mbochs_dev);
if (ret)
- goto failed2;
+ goto err_class;
- ret = mdev_register_device(&mbochs_dev, &mdev_fops);
+ ret = mdev_register_parent(&mbochs_parent, &mbochs_dev, &mbochs_driver,
+ mbochs_mdev_types,
+ ARRAY_SIZE(mbochs_mdev_types));
if (ret)
- goto failed3;
+ goto err_device;
return 0;
-failed3:
+err_device:
device_unregister(&mbochs_dev);
-failed2:
+err_class:
class_destroy(mbochs_class);
-failed1:
+err_driver:
+ mdev_unregister_driver(&mbochs_driver);
+err_cdev:
cdev_del(&mbochs_cdev);
unregister_chrdev_region(mbochs_devt, MINORMASK + 1);
return ret;
@@ -1471,14 +1456,16 @@ failed1:
static void __exit mbochs_dev_exit(void)
{
mbochs_dev.bus = NULL;
- mdev_unregister_device(&mbochs_dev);
+ mdev_unregister_parent(&mbochs_parent);
device_unregister(&mbochs_dev);
+ mdev_unregister_driver(&mbochs_driver);
cdev_del(&mbochs_cdev);
unregister_chrdev_region(mbochs_devt, MINORMASK + 1);
class_destroy(mbochs_class);
mbochs_class = NULL;
}
+MODULE_IMPORT_NS(DMA_BUF);
module_init(mbochs_dev_init)
module_exit(mbochs_dev_exit)
diff --git a/samples/vfio-mdev/mdpy-defs.h b/samples/vfio-mdev/mdpy-defs.h
index eb26421b6429..961c55ec3ffd 100644
--- a/samples/vfio-mdev/mdpy-defs.h
+++ b/samples/vfio-mdev/mdpy-defs.h
@@ -9,7 +9,7 @@
*/
/* pci ids */
-#define MDPY_PCI_VENDOR_ID 0x1b36 /* redhat */
+#define MDPY_PCI_VENDOR_ID PCI_VENDOR_ID_REDHAT
#define MDPY_PCI_DEVICE_ID 0x000f
#define MDPY_PCI_SUBVENDOR_ID PCI_SUBVENDOR_ID_REDHAT_QUMRANET
#define MDPY_PCI_SUBDEVICE_ID PCI_SUBDEVICE_ID_QEMU
diff --git a/samples/vfio-mdev/mdpy-fb.c b/samples/vfio-mdev/mdpy-fb.c
index 21dbf63d6e41..9ec93d90e8a5 100644
--- a/samples/vfio-mdev/mdpy-fb.c
+++ b/samples/vfio-mdev/mdpy-fb.c
@@ -117,22 +117,27 @@ static int mdpy_fb_probe(struct pci_dev *pdev,
if (format != DRM_FORMAT_XRGB8888) {
pci_err(pdev, "format mismatch (0x%x != 0x%x)\n",
format, DRM_FORMAT_XRGB8888);
- return -EINVAL;
+ ret = -EINVAL;
+ goto err_release_regions;
}
if (width < 100 || width > 10000) {
pci_err(pdev, "width (%d) out of range\n", width);
- return -EINVAL;
+ ret = -EINVAL;
+ goto err_release_regions;
}
if (height < 100 || height > 10000) {
pci_err(pdev, "height (%d) out of range\n", height);
- return -EINVAL;
+ ret = -EINVAL;
+ goto err_release_regions;
}
pci_info(pdev, "mdpy found: %dx%d framebuffer\n",
width, height);
info = framebuffer_alloc(sizeof(struct mdpy_fb_par), &pdev->dev);
- if (!info)
+ if (!info) {
+ ret = -ENOMEM;
goto err_release_regions;
+ }
pci_set_drvdata(pdev, info);
par = info->par;
diff --git a/samples/vfio-mdev/mdpy.c b/samples/vfio-mdev/mdpy.c
index cc86bf6566e4..946e8cfde6fd 100644
--- a/samples/vfio-mdev/mdpy.c
+++ b/samples/vfio-mdev/mdpy.c
@@ -17,7 +17,6 @@
*/
#include <linux/init.h>
#include <linux/module.h>
-#include <linux/device.h>
#include <linux/kernel.h>
#include <linux/slab.h>
#include <linux/vmalloc.h>
@@ -43,36 +42,34 @@
MODULE_LICENSE("GPL v2");
-static int max_devices = 4;
-module_param_named(count, max_devices, int, 0444);
-MODULE_PARM_DESC(count, "number of " MDPY_NAME " devices");
-
-
#define MDPY_TYPE_1 "vga"
#define MDPY_TYPE_2 "xga"
#define MDPY_TYPE_3 "hd"
-static const struct mdpy_type {
- const char *name;
+static struct mdpy_type {
+ struct mdev_type type;
u32 format;
u32 bytepp;
u32 width;
u32 height;
} mdpy_types[] = {
{
- .name = MDPY_CLASS_NAME "-" MDPY_TYPE_1,
+ .type.sysfs_name = MDPY_TYPE_1,
+ .type.pretty_name = MDPY_CLASS_NAME "-" MDPY_TYPE_1,
.format = DRM_FORMAT_XRGB8888,
.bytepp = 4,
.width = 640,
.height = 480,
}, {
- .name = MDPY_CLASS_NAME "-" MDPY_TYPE_2,
+ .type.sysfs_name = MDPY_TYPE_2,
+ .type.pretty_name = MDPY_CLASS_NAME "-" MDPY_TYPE_2,
.format = DRM_FORMAT_XRGB8888,
.bytepp = 4,
.width = 1024,
.height = 768,
}, {
- .name = MDPY_CLASS_NAME "-" MDPY_TYPE_3,
+ .type.sysfs_name = MDPY_TYPE_3,
+ .type.pretty_name = MDPY_CLASS_NAME "-" MDPY_TYPE_3,
.format = DRM_FORMAT_XRGB8888,
.bytepp = 4,
.width = 1920,
@@ -80,14 +77,22 @@ static const struct mdpy_type {
},
};
+static struct mdev_type *mdpy_mdev_types[] = {
+ &mdpy_types[0].type,
+ &mdpy_types[1].type,
+ &mdpy_types[2].type,
+};
+
static dev_t mdpy_devt;
static struct class *mdpy_class;
static struct cdev mdpy_cdev;
static struct device mdpy_dev;
-static u32 mdpy_count;
+static struct mdev_parent mdpy_parent;
+static const struct vfio_device_ops mdpy_dev_ops;
/* State of each mdev device */
struct mdev_state {
+ struct vfio_device vdev;
u8 *vconfig;
u32 bar_mask;
struct mutex ops_lock;
@@ -99,16 +104,6 @@ struct mdev_state {
void *memblk;
};
-static const struct mdpy_type *mdpy_find_type(struct kobject *kobj)
-{
- int i;
-
- for (i = 0; i < ARRAY_SIZE(mdpy_types); i++)
- if (strcmp(mdpy_types[i].name, kobj->name) == 0)
- return mdpy_types + i;
- return NULL;
-}
-
static void mdpy_create_config_space(struct mdev_state *mdev_state)
{
STORE_LE16((u16 *) &mdev_state->vconfig[PCI_VENDOR_ID],
@@ -172,11 +167,9 @@ static void handle_pci_cfg_write(struct mdev_state *mdev_state, u16 offset,
}
}
-static ssize_t mdev_access(struct mdev_device *mdev, char *buf, size_t count,
- loff_t pos, bool is_write)
+static ssize_t mdev_access(struct mdev_state *mdev_state, char *buf,
+ size_t count, loff_t pos, bool is_write)
{
- struct mdev_state *mdev_state = mdev_get_drvdata(mdev);
- struct device *dev = mdev_dev(mdev);
int ret = 0;
mutex_lock(&mdev_state->ops_lock);
@@ -197,8 +190,9 @@ static ssize_t mdev_access(struct mdev_device *mdev, char *buf, size_t count,
memcpy(buf, mdev_state->memblk, count);
} else {
- dev_info(dev, "%s: %s @0x%llx (unhandled)\n",
- __func__, is_write ? "WR" : "RD", pos);
+ dev_info(mdev_state->vdev.dev,
+ "%s: %s @0x%llx (unhandled)\n", __func__,
+ is_write ? "WR" : "RD", pos);
ret = -1;
goto accessfailed;
}
@@ -212,9 +206,8 @@ accessfailed:
return ret;
}
-static int mdpy_reset(struct mdev_device *mdev)
+static int mdpy_reset(struct mdev_state *mdev_state)
{
- struct mdev_state *mdev_state = mdev_get_drvdata(mdev);
u32 stride, i;
/* initialize with gray gradient */
@@ -226,71 +219,88 @@ static int mdpy_reset(struct mdev_device *mdev)
return 0;
}
-static int mdpy_create(struct kobject *kobj, struct mdev_device *mdev)
+static int mdpy_init_dev(struct vfio_device *vdev)
{
- const struct mdpy_type *type = mdpy_find_type(kobj);
- struct device *dev = mdev_dev(mdev);
- struct mdev_state *mdev_state;
+ struct mdev_state *mdev_state =
+ container_of(vdev, struct mdev_state, vdev);
+ struct mdev_device *mdev = to_mdev_device(vdev->dev);
+ const struct mdpy_type *type =
+ container_of(mdev->type, struct mdpy_type, type);
u32 fbsize;
-
- if (mdpy_count >= max_devices)
- return -ENOMEM;
-
- mdev_state = kzalloc(sizeof(struct mdev_state), GFP_KERNEL);
- if (mdev_state == NULL)
- return -ENOMEM;
+ int ret = -ENOMEM;
mdev_state->vconfig = kzalloc(MDPY_CONFIG_SPACE_SIZE, GFP_KERNEL);
- if (mdev_state->vconfig == NULL) {
- kfree(mdev_state);
- return -ENOMEM;
- }
+ if (!mdev_state->vconfig)
+ return ret;
- if (!type)
- type = &mdpy_types[0];
fbsize = roundup_pow_of_two(type->width * type->height * type->bytepp);
mdev_state->memblk = vmalloc_user(fbsize);
- if (!mdev_state->memblk) {
- kfree(mdev_state->vconfig);
- kfree(mdev_state);
- return -ENOMEM;
- }
- dev_info(dev, "%s: %s (%dx%d)\n",
- __func__, kobj->name, type->width, type->height);
+ if (!mdev_state->memblk)
+ goto out_vconfig;
mutex_init(&mdev_state->ops_lock);
mdev_state->mdev = mdev;
- mdev_set_drvdata(mdev, mdev_state);
-
- mdev_state->type = type;
+ mdev_state->type = type;
mdev_state->memsize = fbsize;
mdpy_create_config_space(mdev_state);
- mdpy_reset(mdev);
+ mdpy_reset(mdev_state);
- mdpy_count++;
+ dev_info(vdev->dev, "%s: %s (%dx%d)\n", __func__, type->type.pretty_name,
+ type->width, type->height);
return 0;
+
+out_vconfig:
+ kfree(mdev_state->vconfig);
+ return ret;
}
-static int mdpy_remove(struct mdev_device *mdev)
+static int mdpy_probe(struct mdev_device *mdev)
{
- struct mdev_state *mdev_state = mdev_get_drvdata(mdev);
- struct device *dev = mdev_dev(mdev);
+ struct mdev_state *mdev_state;
+ int ret;
+
+ mdev_state = vfio_alloc_device(mdev_state, vdev, &mdev->dev,
+ &mdpy_dev_ops);
+ if (IS_ERR(mdev_state))
+ return PTR_ERR(mdev_state);
+
+ ret = vfio_register_emulated_iommu_dev(&mdev_state->vdev);
+ if (ret)
+ goto err_put_vdev;
+ dev_set_drvdata(&mdev->dev, mdev_state);
+ return 0;
- dev_info(dev, "%s\n", __func__);
+err_put_vdev:
+ vfio_put_device(&mdev_state->vdev);
+ return ret;
+}
+
+static void mdpy_release_dev(struct vfio_device *vdev)
+{
+ struct mdev_state *mdev_state =
+ container_of(vdev, struct mdev_state, vdev);
- mdev_set_drvdata(mdev, NULL);
vfree(mdev_state->memblk);
kfree(mdev_state->vconfig);
- kfree(mdev_state);
+ vfio_free_device(vdev);
+}
- mdpy_count--;
- return 0;
+static void mdpy_remove(struct mdev_device *mdev)
+{
+ struct mdev_state *mdev_state = dev_get_drvdata(&mdev->dev);
+
+ dev_info(&mdev->dev, "%s\n", __func__);
+
+ vfio_unregister_group_dev(&mdev_state->vdev);
+ vfio_put_device(&mdev_state->vdev);
}
-static ssize_t mdpy_read(struct mdev_device *mdev, char __user *buf,
+static ssize_t mdpy_read(struct vfio_device *vdev, char __user *buf,
size_t count, loff_t *ppos)
{
+ struct mdev_state *mdev_state =
+ container_of(vdev, struct mdev_state, vdev);
unsigned int done = 0;
int ret;
@@ -300,8 +310,8 @@ static ssize_t mdpy_read(struct mdev_device *mdev, char __user *buf,
if (count >= 4 && !(*ppos % 4)) {
u32 val;
- ret = mdev_access(mdev, (char *)&val, sizeof(val),
- *ppos, false);
+ ret = mdev_access(mdev_state, (char *)&val, sizeof(val),
+ *ppos, false);
if (ret <= 0)
goto read_err;
@@ -312,7 +322,7 @@ static ssize_t mdpy_read(struct mdev_device *mdev, char __user *buf,
} else if (count >= 2 && !(*ppos % 2)) {
u16 val;
- ret = mdev_access(mdev, (char *)&val, sizeof(val),
+ ret = mdev_access(mdev_state, (char *)&val, sizeof(val),
*ppos, false);
if (ret <= 0)
goto read_err;
@@ -324,7 +334,7 @@ static ssize_t mdpy_read(struct mdev_device *mdev, char __user *buf,
} else {
u8 val;
- ret = mdev_access(mdev, (char *)&val, sizeof(val),
+ ret = mdev_access(mdev_state, (char *)&val, sizeof(val),
*ppos, false);
if (ret <= 0)
goto read_err;
@@ -347,9 +357,11 @@ read_err:
return -EFAULT;
}
-static ssize_t mdpy_write(struct mdev_device *mdev, const char __user *buf,
+static ssize_t mdpy_write(struct vfio_device *vdev, const char __user *buf,
size_t count, loff_t *ppos)
{
+ struct mdev_state *mdev_state =
+ container_of(vdev, struct mdev_state, vdev);
unsigned int done = 0;
int ret;
@@ -362,7 +374,7 @@ static ssize_t mdpy_write(struct mdev_device *mdev, const char __user *buf,
if (copy_from_user(&val, buf, sizeof(val)))
goto write_err;
- ret = mdev_access(mdev, (char *)&val, sizeof(val),
+ ret = mdev_access(mdev_state, (char *)&val, sizeof(val),
*ppos, true);
if (ret <= 0)
goto write_err;
@@ -374,7 +386,7 @@ static ssize_t mdpy_write(struct mdev_device *mdev, const char __user *buf,
if (copy_from_user(&val, buf, sizeof(val)))
goto write_err;
- ret = mdev_access(mdev, (char *)&val, sizeof(val),
+ ret = mdev_access(mdev_state, (char *)&val, sizeof(val),
*ppos, true);
if (ret <= 0)
goto write_err;
@@ -386,7 +398,7 @@ static ssize_t mdpy_write(struct mdev_device *mdev, const char __user *buf,
if (copy_from_user(&val, buf, sizeof(val)))
goto write_err;
- ret = mdev_access(mdev, (char *)&val, sizeof(val),
+ ret = mdev_access(mdev_state, (char *)&val, sizeof(val),
*ppos, true);
if (ret <= 0)
goto write_err;
@@ -404,9 +416,10 @@ write_err:
return -EFAULT;
}
-static int mdpy_mmap(struct mdev_device *mdev, struct vm_area_struct *vma)
+static int mdpy_mmap(struct vfio_device *vdev, struct vm_area_struct *vma)
{
- struct mdev_state *mdev_state = mdev_get_drvdata(mdev);
+ struct mdev_state *mdev_state =
+ container_of(vdev, struct mdev_state, vdev);
if (vma->vm_pgoff != MDPY_MEMORY_BAR_OFFSET >> PAGE_SHIFT)
return -EINVAL;
@@ -417,21 +430,13 @@ static int mdpy_mmap(struct mdev_device *mdev, struct vm_area_struct *vma)
if ((vma->vm_flags & VM_SHARED) == 0)
return -EINVAL;
- return remap_vmalloc_range_partial(vma, vma->vm_start,
- mdev_state->memblk,
- vma->vm_end - vma->vm_start);
+ return remap_vmalloc_range(vma, mdev_state->memblk, 0);
}
-static int mdpy_get_region_info(struct mdev_device *mdev,
+static int mdpy_get_region_info(struct mdev_state *mdev_state,
struct vfio_region_info *region_info,
u16 *cap_type_id, void **cap_type)
{
- struct mdev_state *mdev_state;
-
- mdev_state = mdev_get_drvdata(mdev);
- if (!mdev_state)
- return -EINVAL;
-
if (region_info->index >= VFIO_PCI_NUM_REGIONS &&
region_info->index != MDPY_DISPLAY_REGION)
return -EINVAL;
@@ -460,15 +465,13 @@ static int mdpy_get_region_info(struct mdev_device *mdev,
return 0;
}
-static int mdpy_get_irq_info(struct mdev_device *mdev,
- struct vfio_irq_info *irq_info)
+static int mdpy_get_irq_info(struct vfio_irq_info *irq_info)
{
irq_info->count = 0;
return 0;
}
-static int mdpy_get_device_info(struct mdev_device *mdev,
- struct vfio_device_info *dev_info)
+static int mdpy_get_device_info(struct vfio_device_info *dev_info)
{
dev_info->flags = VFIO_DEVICE_FLAGS_PCI;
dev_info->num_regions = VFIO_PCI_NUM_REGIONS;
@@ -476,11 +479,9 @@ static int mdpy_get_device_info(struct mdev_device *mdev,
return 0;
}
-static int mdpy_query_gfx_plane(struct mdev_device *mdev,
+static int mdpy_query_gfx_plane(struct mdev_state *mdev_state,
struct vfio_device_gfx_plane_info *plane)
{
- struct mdev_state *mdev_state = mdev_get_drvdata(mdev);
-
if (plane->flags & VFIO_GFX_PLANE_TYPE_PROBE) {
if (plane->flags == (VFIO_GFX_PLANE_TYPE_PROBE |
VFIO_GFX_PLANE_TYPE_REGION))
@@ -509,14 +510,13 @@ static int mdpy_query_gfx_plane(struct mdev_device *mdev,
return 0;
}
-static long mdpy_ioctl(struct mdev_device *mdev, unsigned int cmd,
+static long mdpy_ioctl(struct vfio_device *vdev, unsigned int cmd,
unsigned long arg)
{
int ret = 0;
unsigned long minsz;
- struct mdev_state *mdev_state;
-
- mdev_state = mdev_get_drvdata(mdev);
+ struct mdev_state *mdev_state =
+ container_of(vdev, struct mdev_state, vdev);
switch (cmd) {
case VFIO_DEVICE_GET_INFO:
@@ -531,7 +531,7 @@ static long mdpy_ioctl(struct mdev_device *mdev, unsigned int cmd,
if (info.argsz < minsz)
return -EINVAL;
- ret = mdpy_get_device_info(mdev, &info);
+ ret = mdpy_get_device_info(&info);
if (ret)
return ret;
@@ -556,7 +556,7 @@ static long mdpy_ioctl(struct mdev_device *mdev, unsigned int cmd,
if (info.argsz < minsz)
return -EINVAL;
- ret = mdpy_get_region_info(mdev, &info, &cap_type_id,
+ ret = mdpy_get_region_info(mdev_state, &info, &cap_type_id,
&cap_type);
if (ret)
return ret;
@@ -580,7 +580,7 @@ static long mdpy_ioctl(struct mdev_device *mdev, unsigned int cmd,
(info.index >= mdev_state->dev_info.num_irqs))
return -EINVAL;
- ret = mdpy_get_irq_info(mdev, &info);
+ ret = mdpy_get_irq_info(&info);
if (ret)
return ret;
@@ -603,7 +603,7 @@ static long mdpy_ioctl(struct mdev_device *mdev, unsigned int cmd,
if (plane.argsz < minsz)
return -EINVAL;
- ret = mdpy_query_gfx_plane(mdev, &plane);
+ ret = mdpy_query_gfx_plane(mdev_state, &plane);
if (ret)
return ret;
@@ -617,30 +617,16 @@ static long mdpy_ioctl(struct mdev_device *mdev, unsigned int cmd,
return -EINVAL;
case VFIO_DEVICE_RESET:
- return mdpy_reset(mdev);
+ return mdpy_reset(mdev_state);
}
return -ENOTTY;
}
-static int mdpy_open(struct mdev_device *mdev)
-{
- if (!try_module_get(THIS_MODULE))
- return -ENODEV;
-
- return 0;
-}
-
-static void mdpy_close(struct mdev_device *mdev)
-{
- module_put(THIS_MODULE);
-}
-
static ssize_t
resolution_show(struct device *dev, struct device_attribute *attr,
char *buf)
{
- struct mdev_device *mdev = mdev_from_dev(dev);
- struct mdev_state *mdev_state = mdev_get_drvdata(mdev);
+ struct mdev_state *mdev_state = dev_get_drvdata(dev);
return sprintf(buf, "%dx%d\n",
mdev_state->type->width,
@@ -658,85 +644,40 @@ static const struct attribute_group mdev_dev_group = {
.attrs = mdev_dev_attrs,
};
-const struct attribute_group *mdev_dev_groups[] = {
+static const struct attribute_group *mdev_dev_groups[] = {
&mdev_dev_group,
NULL,
};
-static ssize_t
-name_show(struct kobject *kobj, struct device *dev, char *buf)
+static ssize_t mdpy_show_description(struct mdev_type *mtype, char *buf)
{
- return sprintf(buf, "%s\n", kobj->name);
-}
-MDEV_TYPE_ATTR_RO(name);
-
-static ssize_t
-description_show(struct kobject *kobj, struct device *dev, char *buf)
-{
- const struct mdpy_type *type = mdpy_find_type(kobj);
+ struct mdpy_type *type = container_of(mtype, struct mdpy_type, type);
return sprintf(buf, "virtual display, %dx%d framebuffer\n",
- type ? type->width : 0,
- type ? type->height : 0);
-}
-MDEV_TYPE_ATTR_RO(description);
-
-static ssize_t
-available_instances_show(struct kobject *kobj, struct device *dev, char *buf)
-{
- return sprintf(buf, "%d\n", max_devices - mdpy_count);
+ type->width, type->height);
}
-MDEV_TYPE_ATTR_RO(available_instances);
-
-static ssize_t device_api_show(struct kobject *kobj, struct device *dev,
- char *buf)
-{
- return sprintf(buf, "%s\n", VFIO_DEVICE_API_PCI_STRING);
-}
-MDEV_TYPE_ATTR_RO(device_api);
-
-static struct attribute *mdev_types_attrs[] = {
- &mdev_type_attr_name.attr,
- &mdev_type_attr_description.attr,
- &mdev_type_attr_device_api.attr,
- &mdev_type_attr_available_instances.attr,
- NULL,
-};
-static struct attribute_group mdev_type_group1 = {
- .name = MDPY_TYPE_1,
- .attrs = mdev_types_attrs,
+static const struct vfio_device_ops mdpy_dev_ops = {
+ .init = mdpy_init_dev,
+ .release = mdpy_release_dev,
+ .read = mdpy_read,
+ .write = mdpy_write,
+ .ioctl = mdpy_ioctl,
+ .mmap = mdpy_mmap,
};
-static struct attribute_group mdev_type_group2 = {
- .name = MDPY_TYPE_2,
- .attrs = mdev_types_attrs,
-};
-
-static struct attribute_group mdev_type_group3 = {
- .name = MDPY_TYPE_3,
- .attrs = mdev_types_attrs,
-};
-
-static struct attribute_group *mdev_type_groups[] = {
- &mdev_type_group1,
- &mdev_type_group2,
- &mdev_type_group3,
- NULL,
-};
-
-static const struct mdev_parent_ops mdev_fops = {
- .owner = THIS_MODULE,
- .mdev_attr_groups = mdev_dev_groups,
- .supported_type_groups = mdev_type_groups,
- .create = mdpy_create,
- .remove = mdpy_remove,
- .open = mdpy_open,
- .release = mdpy_close,
- .read = mdpy_read,
- .write = mdpy_write,
- .ioctl = mdpy_ioctl,
- .mmap = mdpy_mmap,
+static struct mdev_driver mdpy_driver = {
+ .device_api = VFIO_DEVICE_API_PCI_STRING,
+ .max_instances = 4,
+ .driver = {
+ .name = "mdpy",
+ .owner = THIS_MODULE,
+ .mod_name = KBUILD_MODNAME,
+ .dev_groups = mdev_dev_groups,
+ },
+ .probe = mdpy_probe,
+ .remove = mdpy_remove,
+ .show_description = mdpy_show_description,
};
static const struct file_operations vd_fops = {
@@ -761,11 +702,15 @@ static int __init mdpy_dev_init(void)
cdev_add(&mdpy_cdev, mdpy_devt, MINORMASK + 1);
pr_info("%s: major %d\n", __func__, MAJOR(mdpy_devt));
+ ret = mdev_register_driver(&mdpy_driver);
+ if (ret)
+ goto err_cdev;
+
mdpy_class = class_create(THIS_MODULE, MDPY_CLASS_NAME);
if (IS_ERR(mdpy_class)) {
pr_err("Error: failed to register mdpy_dev class\n");
ret = PTR_ERR(mdpy_class);
- goto failed1;
+ goto err_driver;
}
mdpy_dev.class = mdpy_class;
mdpy_dev.release = mdpy_device_release;
@@ -773,19 +718,23 @@ static int __init mdpy_dev_init(void)
ret = device_register(&mdpy_dev);
if (ret)
- goto failed2;
+ goto err_class;
- ret = mdev_register_device(&mdpy_dev, &mdev_fops);
+ ret = mdev_register_parent(&mdpy_parent, &mdpy_dev, &mdpy_driver,
+ mdpy_mdev_types,
+ ARRAY_SIZE(mdpy_mdev_types));
if (ret)
- goto failed3;
+ goto err_device;
return 0;
-failed3:
+err_device:
device_unregister(&mdpy_dev);
-failed2:
+err_class:
class_destroy(mdpy_class);
-failed1:
+err_driver:
+ mdev_unregister_driver(&mdpy_driver);
+err_cdev:
cdev_del(&mdpy_cdev);
unregister_chrdev_region(mdpy_devt, MINORMASK + 1);
return ret;
@@ -794,14 +743,18 @@ failed1:
static void __exit mdpy_dev_exit(void)
{
mdpy_dev.bus = NULL;
- mdev_unregister_device(&mdpy_dev);
+ mdev_unregister_parent(&mdpy_parent);
device_unregister(&mdpy_dev);
+ mdev_unregister_driver(&mdpy_driver);
cdev_del(&mdpy_cdev);
unregister_chrdev_region(mdpy_devt, MINORMASK + 1);
class_destroy(mdpy_class);
mdpy_class = NULL;
}
+module_param_named(count, mdpy_driver.max_instances, int, 0444);
+MODULE_PARM_DESC(count, "number of " MDPY_NAME " devices");
+
module_init(mdpy_dev_init)
module_exit(mdpy_dev_exit)
diff --git a/samples/vfio-mdev/mtty.c b/samples/vfio-mdev/mtty.c
index ce84a300a4da..e72085fc1376 100644
--- a/samples/vfio-mdev/mtty.c
+++ b/samples/vfio-mdev/mtty.c
@@ -12,7 +12,6 @@
#include <linux/init.h>
#include <linux/module.h>
-#include <linux/device.h>
#include <linux/kernel.h>
#include <linux/fs.h>
#include <linux/poll.h>
@@ -20,7 +19,6 @@
#include <linux/cdev.h>
#include <linux/sched.h>
#include <linux/wait.h>
-#include <linux/uuid.h>
#include <linux/vfio.h>
#include <linux/iommu.h>
#include <linux/sysfs.h>
@@ -74,6 +72,7 @@ static struct mtty_dev {
struct cdev vd_cdev;
struct idr vd_idr;
struct device dev;
+ struct mdev_parent parent;
} mtty_dev;
struct mdev_region_info {
@@ -127,6 +126,7 @@ struct serial_port {
/* State of each mdev device */
struct mdev_state {
+ struct vfio_device vdev;
int irq_fd;
struct eventfd_ctx *intx_evtfd;
struct eventfd_ctx *msi_evtfd;
@@ -143,13 +143,29 @@ struct mdev_state {
int nr_ports;
};
-static struct mutex mdev_list_lock;
-static struct list_head mdev_devices_list;
+static struct mtty_type {
+ struct mdev_type type;
+ int nr_ports;
+} mtty_types[2] = {
+ { .nr_ports = 1, .type.sysfs_name = "1",
+ .type.pretty_name = "Single port serial" },
+ { .nr_ports = 2, .type.sysfs_name = "2",
+ .type.pretty_name = "Dual port serial" },
+};
+
+static struct mdev_type *mtty_mdev_types[] = {
+ &mtty_types[0].type,
+ &mtty_types[1].type,
+};
+
+static atomic_t mdev_avail_ports = ATOMIC_INIT(MAX_MTTYS);
static const struct file_operations vd_fops = {
.owner = THIS_MODULE,
};
+static const struct vfio_device_ops mtty_dev_ops;
+
/* function prototypes */
static int mtty_trigger_interrupt(struct mdev_state *mdev_state);
@@ -631,23 +647,16 @@ static void mdev_read_base(struct mdev_state *mdev_state)
}
}
-static ssize_t mdev_access(struct mdev_device *mdev, u8 *buf, size_t count,
+static ssize_t mdev_access(struct mdev_state *mdev_state, u8 *buf, size_t count,
loff_t pos, bool is_write)
{
- struct mdev_state *mdev_state;
unsigned int index;
loff_t offset;
int ret = 0;
- if (!mdev || !buf)
+ if (!buf)
return -EINVAL;
- mdev_state = mdev_get_drvdata(mdev);
- if (!mdev_state) {
- pr_err("%s mdev_state not found\n", __func__);
- return -EINVAL;
- }
-
mutex_lock(&mdev_state->ops_lock);
index = MTTY_VFIO_PCI_OFFSET_TO_INDEX(pos);
@@ -708,97 +717,96 @@ accessfailed:
return ret;
}
-static int mtty_create(struct kobject *kobj, struct mdev_device *mdev)
+static int mtty_init_dev(struct vfio_device *vdev)
{
- struct mdev_state *mdev_state;
- char name[MTTY_STRING_LEN];
- int nr_ports = 0, i;
-
- if (!mdev)
- return -EINVAL;
-
- for (i = 0; i < 2; i++) {
- snprintf(name, MTTY_STRING_LEN, "%s-%d",
- dev_driver_string(mdev_parent_dev(mdev)), i + 1);
- if (!strcmp(kobj->name, name)) {
- nr_ports = i + 1;
- break;
- }
- }
-
- if (!nr_ports)
- return -EINVAL;
+ struct mdev_state *mdev_state =
+ container_of(vdev, struct mdev_state, vdev);
+ struct mdev_device *mdev = to_mdev_device(vdev->dev);
+ struct mtty_type *type =
+ container_of(mdev->type, struct mtty_type, type);
+ int avail_ports = atomic_read(&mdev_avail_ports);
+ int ret;
- mdev_state = kzalloc(sizeof(struct mdev_state), GFP_KERNEL);
- if (mdev_state == NULL)
- return -ENOMEM;
+ do {
+ if (avail_ports < type->nr_ports)
+ return -ENOSPC;
+ } while (!atomic_try_cmpxchg(&mdev_avail_ports,
+ &avail_ports,
+ avail_ports - type->nr_ports));
- mdev_state->nr_ports = nr_ports;
+ mdev_state->nr_ports = type->nr_ports;
mdev_state->irq_index = -1;
mdev_state->s[0].max_fifo_size = MAX_FIFO_SIZE;
mdev_state->s[1].max_fifo_size = MAX_FIFO_SIZE;
mutex_init(&mdev_state->rxtx_lock);
- mdev_state->vconfig = kzalloc(MTTY_CONFIG_SPACE_SIZE, GFP_KERNEL);
- if (mdev_state->vconfig == NULL) {
- kfree(mdev_state);
- return -ENOMEM;
+ mdev_state->vconfig = kzalloc(MTTY_CONFIG_SPACE_SIZE, GFP_KERNEL);
+ if (!mdev_state->vconfig) {
+ ret = -ENOMEM;
+ goto err_nr_ports;
}
mutex_init(&mdev_state->ops_lock);
mdev_state->mdev = mdev;
- mdev_set_drvdata(mdev, mdev_state);
-
mtty_create_config_space(mdev_state);
-
- mutex_lock(&mdev_list_lock);
- list_add(&mdev_state->next, &mdev_devices_list);
- mutex_unlock(&mdev_list_lock);
-
return 0;
+
+err_nr_ports:
+ atomic_add(type->nr_ports, &mdev_avail_ports);
+ return ret;
}
-static int mtty_remove(struct mdev_device *mdev)
+static int mtty_probe(struct mdev_device *mdev)
{
- struct mdev_state *mds, *tmp_mds;
- struct mdev_state *mdev_state = mdev_get_drvdata(mdev);
- int ret = -EINVAL;
-
- mutex_lock(&mdev_list_lock);
- list_for_each_entry_safe(mds, tmp_mds, &mdev_devices_list, next) {
- if (mdev_state == mds) {
- list_del(&mdev_state->next);
- mdev_set_drvdata(mdev, NULL);
- kfree(mdev_state->vconfig);
- kfree(mdev_state);
- ret = 0;
- break;
- }
- }
- mutex_unlock(&mdev_list_lock);
+ struct mdev_state *mdev_state;
+ int ret;
+
+ mdev_state = vfio_alloc_device(mdev_state, vdev, &mdev->dev,
+ &mtty_dev_ops);
+ if (IS_ERR(mdev_state))
+ return PTR_ERR(mdev_state);
+
+ ret = vfio_register_emulated_iommu_dev(&mdev_state->vdev);
+ if (ret)
+ goto err_put_vdev;
+ dev_set_drvdata(&mdev->dev, mdev_state);
+ return 0;
+err_put_vdev:
+ vfio_put_device(&mdev_state->vdev);
return ret;
}
-static int mtty_reset(struct mdev_device *mdev)
+static void mtty_release_dev(struct vfio_device *vdev)
{
- struct mdev_state *mdev_state;
+ struct mdev_state *mdev_state =
+ container_of(vdev, struct mdev_state, vdev);
- if (!mdev)
- return -EINVAL;
+ atomic_add(mdev_state->nr_ports, &mdev_avail_ports);
+ kfree(mdev_state->vconfig);
+ vfio_free_device(vdev);
+}
- mdev_state = mdev_get_drvdata(mdev);
- if (!mdev_state)
- return -EINVAL;
+static void mtty_remove(struct mdev_device *mdev)
+{
+ struct mdev_state *mdev_state = dev_get_drvdata(&mdev->dev);
+ vfio_unregister_group_dev(&mdev_state->vdev);
+ vfio_put_device(&mdev_state->vdev);
+}
+
+static int mtty_reset(struct mdev_state *mdev_state)
+{
pr_info("%s: called\n", __func__);
return 0;
}
-static ssize_t mtty_read(struct mdev_device *mdev, char __user *buf,
+static ssize_t mtty_read(struct vfio_device *vdev, char __user *buf,
size_t count, loff_t *ppos)
{
+ struct mdev_state *mdev_state =
+ container_of(vdev, struct mdev_state, vdev);
unsigned int done = 0;
int ret;
@@ -808,7 +816,7 @@ static ssize_t mtty_read(struct mdev_device *mdev, char __user *buf,
if (count >= 4 && !(*ppos % 4)) {
u32 val;
- ret = mdev_access(mdev, (u8 *)&val, sizeof(val),
+ ret = mdev_access(mdev_state, (u8 *)&val, sizeof(val),
*ppos, false);
if (ret <= 0)
goto read_err;
@@ -820,7 +828,7 @@ static ssize_t mtty_read(struct mdev_device *mdev, char __user *buf,
} else if (count >= 2 && !(*ppos % 2)) {
u16 val;
- ret = mdev_access(mdev, (u8 *)&val, sizeof(val),
+ ret = mdev_access(mdev_state, (u8 *)&val, sizeof(val),
*ppos, false);
if (ret <= 0)
goto read_err;
@@ -832,7 +840,7 @@ static ssize_t mtty_read(struct mdev_device *mdev, char __user *buf,
} else {
u8 val;
- ret = mdev_access(mdev, (u8 *)&val, sizeof(val),
+ ret = mdev_access(mdev_state, (u8 *)&val, sizeof(val),
*ppos, false);
if (ret <= 0)
goto read_err;
@@ -855,9 +863,11 @@ read_err:
return -EFAULT;
}
-static ssize_t mtty_write(struct mdev_device *mdev, const char __user *buf,
+static ssize_t mtty_write(struct vfio_device *vdev, const char __user *buf,
size_t count, loff_t *ppos)
{
+ struct mdev_state *mdev_state =
+ container_of(vdev, struct mdev_state, vdev);
unsigned int done = 0;
int ret;
@@ -870,7 +880,7 @@ static ssize_t mtty_write(struct mdev_device *mdev, const char __user *buf,
if (copy_from_user(&val, buf, sizeof(val)))
goto write_err;
- ret = mdev_access(mdev, (u8 *)&val, sizeof(val),
+ ret = mdev_access(mdev_state, (u8 *)&val, sizeof(val),
*ppos, true);
if (ret <= 0)
goto write_err;
@@ -882,7 +892,7 @@ static ssize_t mtty_write(struct mdev_device *mdev, const char __user *buf,
if (copy_from_user(&val, buf, sizeof(val)))
goto write_err;
- ret = mdev_access(mdev, (u8 *)&val, sizeof(val),
+ ret = mdev_access(mdev_state, (u8 *)&val, sizeof(val),
*ppos, true);
if (ret <= 0)
goto write_err;
@@ -894,7 +904,7 @@ static ssize_t mtty_write(struct mdev_device *mdev, const char __user *buf,
if (copy_from_user(&val, buf, sizeof(val)))
goto write_err;
- ret = mdev_access(mdev, (u8 *)&val, sizeof(val),
+ ret = mdev_access(mdev_state, (u8 *)&val, sizeof(val),
*ppos, true);
if (ret <= 0)
goto write_err;
@@ -912,19 +922,11 @@ write_err:
return -EFAULT;
}
-static int mtty_set_irqs(struct mdev_device *mdev, uint32_t flags,
+static int mtty_set_irqs(struct mdev_state *mdev_state, uint32_t flags,
unsigned int index, unsigned int start,
unsigned int count, void *data)
{
int ret = 0;
- struct mdev_state *mdev_state;
-
- if (!mdev)
- return -EINVAL;
-
- mdev_state = mdev_get_drvdata(mdev);
- if (!mdev_state)
- return -EINVAL;
mutex_lock(&mdev_state->ops_lock);
switch (index) {
@@ -1040,21 +1042,13 @@ static int mtty_trigger_interrupt(struct mdev_state *mdev_state)
return ret;
}
-static int mtty_get_region_info(struct mdev_device *mdev,
+static int mtty_get_region_info(struct mdev_state *mdev_state,
struct vfio_region_info *region_info,
u16 *cap_type_id, void **cap_type)
{
unsigned int size = 0;
- struct mdev_state *mdev_state;
u32 bar_index;
- if (!mdev)
- return -EINVAL;
-
- mdev_state = mdev_get_drvdata(mdev);
- if (!mdev_state)
- return -EINVAL;
-
bar_index = region_info->index;
if (bar_index >= VFIO_PCI_NUM_REGIONS)
return -EINVAL;
@@ -1089,8 +1083,7 @@ static int mtty_get_region_info(struct mdev_device *mdev,
return 0;
}
-static int mtty_get_irq_info(struct mdev_device *mdev,
- struct vfio_irq_info *irq_info)
+static int mtty_get_irq_info(struct vfio_irq_info *irq_info)
{
switch (irq_info->index) {
case VFIO_PCI_INTX_IRQ_INDEX:
@@ -1114,8 +1107,7 @@ static int mtty_get_irq_info(struct mdev_device *mdev,
return 0;
}
-static int mtty_get_device_info(struct mdev_device *mdev,
- struct vfio_device_info *dev_info)
+static int mtty_get_device_info(struct vfio_device_info *dev_info)
{
dev_info->flags = VFIO_DEVICE_FLAGS_PCI;
dev_info->num_regions = VFIO_PCI_NUM_REGIONS;
@@ -1124,19 +1116,13 @@ static int mtty_get_device_info(struct mdev_device *mdev,
return 0;
}
-static long mtty_ioctl(struct mdev_device *mdev, unsigned int cmd,
+static long mtty_ioctl(struct vfio_device *vdev, unsigned int cmd,
unsigned long arg)
{
+ struct mdev_state *mdev_state =
+ container_of(vdev, struct mdev_state, vdev);
int ret = 0;
unsigned long minsz;
- struct mdev_state *mdev_state;
-
- if (!mdev)
- return -EINVAL;
-
- mdev_state = mdev_get_drvdata(mdev);
- if (!mdev_state)
- return -ENODEV;
switch (cmd) {
case VFIO_DEVICE_GET_INFO:
@@ -1151,7 +1137,7 @@ static long mtty_ioctl(struct mdev_device *mdev, unsigned int cmd,
if (info.argsz < minsz)
return -EINVAL;
- ret = mtty_get_device_info(mdev, &info);
+ ret = mtty_get_device_info(&info);
if (ret)
return ret;
@@ -1176,7 +1162,7 @@ static long mtty_ioctl(struct mdev_device *mdev, unsigned int cmd,
if (info.argsz < minsz)
return -EINVAL;
- ret = mtty_get_region_info(mdev, &info, &cap_type_id,
+ ret = mtty_get_region_info(mdev_state, &info, &cap_type_id,
&cap_type);
if (ret)
return ret;
@@ -1200,7 +1186,7 @@ static long mtty_ioctl(struct mdev_device *mdev, unsigned int cmd,
(info.index >= mdev_state->dev_info.num_irqs))
return -EINVAL;
- ret = mtty_get_irq_info(mdev, &info);
+ ret = mtty_get_irq_info(&info);
if (ret)
return ret;
@@ -1234,61 +1220,23 @@ static long mtty_ioctl(struct mdev_device *mdev, unsigned int cmd,
return PTR_ERR(data);
}
- ret = mtty_set_irqs(mdev, hdr.flags, hdr.index, hdr.start,
+ ret = mtty_set_irqs(mdev_state, hdr.flags, hdr.index, hdr.start,
hdr.count, data);
kfree(ptr);
return ret;
}
case VFIO_DEVICE_RESET:
- return mtty_reset(mdev);
+ return mtty_reset(mdev_state);
}
return -ENOTTY;
}
-static int mtty_open(struct mdev_device *mdev)
-{
- pr_info("%s\n", __func__);
- return 0;
-}
-
-static void mtty_close(struct mdev_device *mdev)
-{
- pr_info("%s\n", __func__);
-}
-
-static ssize_t
-sample_mtty_dev_show(struct device *dev, struct device_attribute *attr,
- char *buf)
-{
- return sprintf(buf, "This is phy device\n");
-}
-
-static DEVICE_ATTR_RO(sample_mtty_dev);
-
-static struct attribute *mtty_dev_attrs[] = {
- &dev_attr_sample_mtty_dev.attr,
- NULL,
-};
-
-static const struct attribute_group mtty_dev_group = {
- .name = "mtty_dev",
- .attrs = mtty_dev_attrs,
-};
-
-static const struct attribute_group *mtty_dev_groups[] = {
- &mtty_dev_group,
- NULL,
-};
-
static ssize_t
sample_mdev_dev_show(struct device *dev, struct device_attribute *attr,
char *buf)
{
- if (mdev_from_dev(dev))
- return sprintf(buf, "This is MDEV %s\n", dev_name(dev));
-
- return sprintf(buf, "\n");
+ return sprintf(buf, "This is MDEV %s\n", dev_name(dev));
}
static DEVICE_ATTR_RO(sample_mdev_dev);
@@ -1308,97 +1256,33 @@ static const struct attribute_group *mdev_dev_groups[] = {
NULL,
};
-static ssize_t
-name_show(struct kobject *kobj, struct device *dev, char *buf)
+static unsigned int mtty_get_available(struct mdev_type *mtype)
{
- char name[MTTY_STRING_LEN];
- int i;
- const char *name_str[2] = {"Single port serial", "Dual port serial"};
+ struct mtty_type *type = container_of(mtype, struct mtty_type, type);
- for (i = 0; i < 2; i++) {
- snprintf(name, MTTY_STRING_LEN, "%s-%d",
- dev_driver_string(dev), i + 1);
- if (!strcmp(kobj->name, name))
- return sprintf(buf, "%s\n", name_str[i]);
- }
-
- return -EINVAL;
+ return atomic_read(&mdev_avail_ports) / type->nr_ports;
}
-static MDEV_TYPE_ATTR_RO(name);
-
-static ssize_t
-available_instances_show(struct kobject *kobj, struct device *dev, char *buf)
-{
- char name[MTTY_STRING_LEN];
- int i;
- struct mdev_state *mds;
- int ports = 0, used = 0;
-
- for (i = 0; i < 2; i++) {
- snprintf(name, MTTY_STRING_LEN, "%s-%d",
- dev_driver_string(dev), i + 1);
- if (!strcmp(kobj->name, name)) {
- ports = i + 1;
- break;
- }
- }
-
- if (!ports)
- return -EINVAL;
-
- list_for_each_entry(mds, &mdev_devices_list, next)
- used += mds->nr_ports;
-
- return sprintf(buf, "%d\n", (MAX_MTTYS - used)/ports);
-}
-
-static MDEV_TYPE_ATTR_RO(available_instances);
-
-
-static ssize_t device_api_show(struct kobject *kobj, struct device *dev,
- char *buf)
-{
- return sprintf(buf, "%s\n", VFIO_DEVICE_API_PCI_STRING);
-}
-
-static MDEV_TYPE_ATTR_RO(device_api);
-
-static struct attribute *mdev_types_attrs[] = {
- &mdev_type_attr_name.attr,
- &mdev_type_attr_device_api.attr,
- &mdev_type_attr_available_instances.attr,
- NULL,
+static const struct vfio_device_ops mtty_dev_ops = {
+ .name = "vfio-mtty",
+ .init = mtty_init_dev,
+ .release = mtty_release_dev,
+ .read = mtty_read,
+ .write = mtty_write,
+ .ioctl = mtty_ioctl,
};
-static struct attribute_group mdev_type_group1 = {
- .name = "1",
- .attrs = mdev_types_attrs,
-};
-
-static struct attribute_group mdev_type_group2 = {
- .name = "2",
- .attrs = mdev_types_attrs,
-};
-
-static struct attribute_group *mdev_type_groups[] = {
- &mdev_type_group1,
- &mdev_type_group2,
- NULL,
-};
-
-static const struct mdev_parent_ops mdev_fops = {
- .owner = THIS_MODULE,
- .dev_attr_groups = mtty_dev_groups,
- .mdev_attr_groups = mdev_dev_groups,
- .supported_type_groups = mdev_type_groups,
- .create = mtty_create,
- .remove = mtty_remove,
- .open = mtty_open,
- .release = mtty_close,
- .read = mtty_read,
- .write = mtty_write,
- .ioctl = mtty_ioctl,
+static struct mdev_driver mtty_driver = {
+ .device_api = VFIO_DEVICE_API_PCI_STRING,
+ .driver = {
+ .name = "mtty",
+ .owner = THIS_MODULE,
+ .mod_name = KBUILD_MODNAME,
+ .dev_groups = mdev_dev_groups,
+ },
+ .probe = mtty_probe,
+ .remove = mtty_remove,
+ .get_available = mtty_get_available,
};
static void mtty_device_release(struct device *dev)
@@ -1429,12 +1313,16 @@ static int __init mtty_dev_init(void)
pr_info("major_number:%d\n", MAJOR(mtty_dev.vd_devt));
+ ret = mdev_register_driver(&mtty_driver);
+ if (ret)
+ goto err_cdev;
+
mtty_dev.vd_class = class_create(THIS_MODULE, MTTY_CLASS_NAME);
if (IS_ERR(mtty_dev.vd_class)) {
pr_err("Error: failed to register mtty_dev class\n");
ret = PTR_ERR(mtty_dev.vd_class);
- goto failed1;
+ goto err_driver;
}
mtty_dev.dev.class = mtty_dev.vd_class;
@@ -1443,38 +1331,35 @@ static int __init mtty_dev_init(void)
ret = device_register(&mtty_dev.dev);
if (ret)
- goto failed2;
+ goto err_class;
- ret = mdev_register_device(&mtty_dev.dev, &mdev_fops);
+ ret = mdev_register_parent(&mtty_dev.parent, &mtty_dev.dev,
+ &mtty_driver, mtty_mdev_types,
+ ARRAY_SIZE(mtty_mdev_types));
if (ret)
- goto failed3;
-
- mutex_init(&mdev_list_lock);
- INIT_LIST_HEAD(&mdev_devices_list);
-
- goto all_done;
-
-failed3:
+ goto err_device;
+ return 0;
+err_device:
device_unregister(&mtty_dev.dev);
-failed2:
+err_class:
class_destroy(mtty_dev.vd_class);
-
-failed1:
+err_driver:
+ mdev_unregister_driver(&mtty_driver);
+err_cdev:
cdev_del(&mtty_dev.vd_cdev);
unregister_chrdev_region(mtty_dev.vd_devt, MINORMASK + 1);
-
-all_done:
return ret;
}
static void __exit mtty_dev_exit(void)
{
mtty_dev.dev.bus = NULL;
- mdev_unregister_device(&mtty_dev.dev);
+ mdev_unregister_parent(&mtty_dev.parent);
device_unregister(&mtty_dev.dev);
idr_destroy(&mtty_dev.vd_idr);
+ mdev_unregister_driver(&mtty_driver);
cdev_del(&mtty_dev.vd_cdev);
unregister_chrdev_region(mtty_dev.vd_devt, MINORMASK + 1);
class_destroy(mtty_dev.vd_class);
diff --git a/samples/vfs/.gitignore b/samples/vfs/.gitignore
index 0806eb0be62d..79212d91285b 100644
--- a/samples/vfs/.gitignore
+++ b/samples/vfs/.gitignore
@@ -1,2 +1,3 @@
-test-fsmount
-test-statx
+# SPDX-License-Identifier: GPL-2.0-only
+/test-fsmount
+/test-statx
diff --git a/samples/vfs/Makefile b/samples/vfs/Makefile
index 65acdde5c117..6377a678134a 100644
--- a/samples/vfs/Makefile
+++ b/samples/vfs/Makefile
@@ -1,10 +1,4 @@
# SPDX-License-Identifier: GPL-2.0-only
-# List of programs to build
-hostprogs := \
- test-fsmount \
- test-statx
+userprogs-always-y += test-fsmount test-statx
-always-y := $(hostprogs)
-
-HOSTCFLAGS_test-fsmount.o += -I$(objtree)/usr/include
-HOSTCFLAGS_test-statx.o += -I$(objtree)/usr/include
+userccflags += -I usr/include
diff --git a/samples/vfs/test-statx.c b/samples/vfs/test-statx.c
index a3d68159fb51..49c7a46cee07 100644
--- a/samples/vfs/test-statx.c
+++ b/samples/vfs/test-statx.c
@@ -23,6 +23,8 @@
#include <linux/fcntl.h>
#define statx foo
#define statx_timestamp foo_timestamp
+struct statx;
+struct statx_timestamp;
#include <sys/stat.h>
#undef statx
#undef statx_timestamp
@@ -216,7 +218,7 @@ int main(int argc, char **argv)
struct statx stx;
int ret, raw = 0, atflag = AT_SYMLINK_NOFOLLOW;
- unsigned int mask = STATX_ALL;
+ unsigned int mask = STATX_BASIC_STATS | STATX_BTIME;
for (argv++; *argv; argv++) {
if (strcmp(*argv, "-F") == 0) {
diff --git a/samples/watch_queue/.gitignore b/samples/watch_queue/.gitignore
new file mode 100644
index 000000000000..823b351d3db9
--- /dev/null
+++ b/samples/watch_queue/.gitignore
@@ -0,0 +1,2 @@
+# SPDX-License-Identifier: GPL-2.0-only
+/watch_test
diff --git a/samples/watch_queue/Makefile b/samples/watch_queue/Makefile
new file mode 100644
index 000000000000..c0db3a6bc524
--- /dev/null
+++ b/samples/watch_queue/Makefile
@@ -0,0 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0-only
+userprogs-always-y += watch_test
+
+userccflags += -I usr/include
diff --git a/samples/watch_queue/watch_test.c b/samples/watch_queue/watch_test.c
new file mode 100644
index 000000000000..8c6cb57d5cfc
--- /dev/null
+++ b/samples/watch_queue/watch_test.c
@@ -0,0 +1,186 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Use watch_queue API to watch for notifications.
+ *
+ * Copyright (C) 2020 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ */
+
+#define _GNU_SOURCE
+#include <stdbool.h>
+#include <stdarg.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <signal.h>
+#include <unistd.h>
+#include <errno.h>
+#include <sys/ioctl.h>
+#include <limits.h>
+#include <linux/watch_queue.h>
+#include <linux/unistd.h>
+#include <linux/keyctl.h>
+
+#ifndef KEYCTL_WATCH_KEY
+#define KEYCTL_WATCH_KEY -1
+#endif
+#ifndef __NR_keyctl
+#define __NR_keyctl -1
+#endif
+
+#define BUF_SIZE 256
+
+static long keyctl_watch_key(int key, int watch_fd, int watch_id)
+{
+ return syscall(__NR_keyctl, KEYCTL_WATCH_KEY, key, watch_fd, watch_id);
+}
+
+static const char *key_subtypes[256] = {
+ [NOTIFY_KEY_INSTANTIATED] = "instantiated",
+ [NOTIFY_KEY_UPDATED] = "updated",
+ [NOTIFY_KEY_LINKED] = "linked",
+ [NOTIFY_KEY_UNLINKED] = "unlinked",
+ [NOTIFY_KEY_CLEARED] = "cleared",
+ [NOTIFY_KEY_REVOKED] = "revoked",
+ [NOTIFY_KEY_INVALIDATED] = "invalidated",
+ [NOTIFY_KEY_SETATTR] = "setattr",
+};
+
+static void saw_key_change(struct watch_notification *n, size_t len)
+{
+ struct key_notification *k = (struct key_notification *)n;
+
+ if (len != sizeof(struct key_notification)) {
+ fprintf(stderr, "Incorrect key message length\n");
+ return;
+ }
+
+ printf("KEY %08x change=%u[%s] aux=%u\n",
+ k->key_id, n->subtype, key_subtypes[n->subtype], k->aux);
+}
+
+/*
+ * Consume and display events.
+ */
+static void consumer(int fd)
+{
+ unsigned char buffer[433], *p, *end;
+ union {
+ struct watch_notification n;
+ unsigned char buf1[128];
+ } n;
+ ssize_t buf_len;
+
+ for (;;) {
+ buf_len = read(fd, buffer, sizeof(buffer));
+ if (buf_len == -1) {
+ perror("read");
+ exit(1);
+ }
+
+ if (buf_len == 0) {
+ printf("-- END --\n");
+ return;
+ }
+
+ if (buf_len > sizeof(buffer)) {
+ fprintf(stderr, "Read buffer overrun: %zd\n", buf_len);
+ return;
+ }
+
+ printf("read() = %zd\n", buf_len);
+
+ p = buffer;
+ end = buffer + buf_len;
+ while (p < end) {
+ size_t largest, len;
+
+ largest = end - p;
+ if (largest > 128)
+ largest = 128;
+ if (largest < sizeof(struct watch_notification)) {
+ fprintf(stderr, "Short message header: %zu\n", largest);
+ return;
+ }
+ memcpy(&n, p, largest);
+
+ printf("NOTIFY[%03zx]: ty=%06x sy=%02x i=%08x\n",
+ p - buffer, n.n.type, n.n.subtype, n.n.info);
+
+ len = n.n.info & WATCH_INFO_LENGTH;
+ if (len < sizeof(n.n) || len > largest) {
+ fprintf(stderr, "Bad message length: %zu/%zu\n", len, largest);
+ exit(1);
+ }
+
+ switch (n.n.type) {
+ case WATCH_TYPE_META:
+ switch (n.n.subtype) {
+ case WATCH_META_REMOVAL_NOTIFICATION:
+ printf("REMOVAL of watchpoint %08x\n",
+ (n.n.info & WATCH_INFO_ID) >>
+ WATCH_INFO_ID__SHIFT);
+ break;
+ case WATCH_META_LOSS_NOTIFICATION:
+ printf("-- LOSS --\n");
+ break;
+ default:
+ printf("other meta record\n");
+ break;
+ }
+ break;
+ case WATCH_TYPE_KEY_NOTIFY:
+ saw_key_change(&n.n, len);
+ break;
+ default:
+ printf("other type\n");
+ break;
+ }
+
+ p += len;
+ }
+ }
+}
+
+static struct watch_notification_filter filter = {
+ .nr_filters = 1,
+ .filters = {
+ [0] = {
+ .type = WATCH_TYPE_KEY_NOTIFY,
+ .subtype_filter[0] = UINT_MAX,
+ },
+ },
+};
+
+int main(int argc, char **argv)
+{
+ int pipefd[2], fd;
+
+ if (pipe2(pipefd, O_NOTIFICATION_PIPE) == -1) {
+ perror("pipe2");
+ exit(1);
+ }
+ fd = pipefd[0];
+
+ if (ioctl(fd, IOC_WATCH_QUEUE_SET_SIZE, BUF_SIZE) == -1) {
+ perror("watch_queue(size)");
+ exit(1);
+ }
+
+ if (ioctl(fd, IOC_WATCH_QUEUE_SET_FILTER, &filter) == -1) {
+ perror("watch_queue(filter)");
+ exit(1);
+ }
+
+ if (keyctl_watch_key(KEY_SPEC_SESSION_KEYRING, fd, 0x01) == -1) {
+ perror("keyctl");
+ exit(1);
+ }
+
+ if (keyctl_watch_key(KEY_SPEC_USER_KEYRING, fd, 0x02) == -1) {
+ perror("keyctl");
+ exit(1);
+ }
+
+ consumer(fd);
+ exit(0);
+}
diff --git a/samples/watchdog/.gitignore b/samples/watchdog/.gitignore
index ff0ebb540333..a70a0150ed9f 100644
--- a/samples/watchdog/.gitignore
+++ b/samples/watchdog/.gitignore
@@ -1 +1,2 @@
-watchdog-simple
+# SPDX-License-Identifier: GPL-2.0-only
+/watchdog-simple
diff --git a/samples/watchdog/Makefile b/samples/watchdog/Makefile
index a9430fa60253..ab39d23dc96b 100644
--- a/samples/watchdog/Makefile
+++ b/samples/watchdog/Makefile
@@ -1,9 +1,2 @@
# SPDX-License-Identifier: GPL-2.0
-CC := $(CROSS_COMPILE)gcc
-PROGS := watchdog-simple
-
-all: $(PROGS)
-
-clean:
- rm -fr $(PROGS)
-
+userprogs-always-y += watchdog-simple