diff options
Diffstat (limited to 'tools/testing')
41 files changed, 7094 insertions, 422 deletions
diff --git a/tools/testing/ktest/examples/vmware.conf b/tools/testing/ktest/examples/vmware.conf new file mode 100644 index 000000000000..61958163d242 --- /dev/null +++ b/tools/testing/ktest/examples/vmware.conf @@ -0,0 +1,137 @@ +# +# This config is an example usage of ktest.pl with a vmware guest +# +# VMware Setup: +# ------------- +# - Edit the Virtual Machine ("Edit virtual machine settings") +# - Add a Serial Port +# - You almost certainly want it set "Connect at power on" +# - Select "Use socket (named pipe)" +# - Select a name that you'll recognize, like 'ktestserialpipe' +# - From: Server +# - To: A Virtual Machine +# - Save +# - Make sure you note the name, it will be in the base directory of the +# virtual machine (where the "disks" are stored. The default +# is /var/lib/vmware/<virtual machine name>/<the name you entered above> +# +# - Make note of the path to the VM +# </End VMware setup> +# +# The guest is called 'Guest' and this would be something that +# could be run on the host to test a virtual machine target. + +MACHINE = Guest + +# Name of the serial pipe you set in the VMware settings +VMWARE_SERIAL_NAME = <the name you entered above> + +# Define a variable of the name of the VM +# Noting this needs to be the name of the kmx file, and usually, the +# name of the directory that it's in. If the directory and name +# differ change the VMWARE_VM_DIR accordingly. +# Please ommit the .kmx extension +VMWARE_VM_NAME = <virtual machine name> + +# VM dir name. This is usually the same as the virtual machine's name, +# but not always the case. Change if they differ +VMWARE_VM_DIR = ${VMWARE_VM_NAME} + +# Base directory that the Virtual machine is contained in +# /var/lib/vmware is the default on Linux +VMWARE_VM_BASE_DIR = /var/lib/vmware/${VMWARE_VM_DIR} + +# Use ncat to read the unix pipe. Anything that can read the Unix Pipe +# and output it's contents to stdout will work +CONSOLE = /usr/bin/ncat -U ${VMWARE_VM_BASE_DIR}/${VMWARE_SERIAL_NAME} + +# Define what version of Workstation you are using +# This is used by vmrun to use the appropriate appripriate pieces to +# test this. In all likelihood you want 'ws' or 'player' +# Valid options: +# ws - Workstation (Windows or Linux host) +# fusion - Fusion (Mac host) +# player - Using VMware Player (Windows or Linux host) +# Note: vmrun has to run directly on the host machine +VMWARE_HOST_TYPE = ws + +# VMware provides `vmrun` to allow you to do certain things to the virtual machine +# This should hard reset the VM and force a boot +VMWARE_POWER_CYCLE = /usr/bin/vmrun -T ${VMWARE_HOST_TYPE} reset ${VMWARE_VM_BASE_DIR}/${VMWARE_VM_NAME}.kmx nogui + +#*************************************# +# This part is the same as test.conf # +#*************************************# + +# The include files will set up the type of test to run. Just set TEST to +# which test you want to run. +# +# TESTS = patchcheck, randconfig, boot, test, config-bisect, bisect, min-config +# +# See the include/*.conf files that define these tests +# +TEST := patchcheck + +# Some tests may have more than one test to run. Define MULTI := 1 to run +# the extra tests. +MULTI := 0 + +# In case you want to differentiate which type of system you are testing +BITS := 64 + +# REBOOT = none, error, fail, empty +# See include/defaults.conf +REBOOT := empty + + +# The defaults file will set up various settings that can be used by all +# machine configs. +INCLUDE include/defaults.conf + + +#*************************************# +# Now we are different from test.conf # +#*************************************# + + +# The example here assumes that Guest is running a Fedora release +# that uses dracut for its initfs. The POST_INSTALL will be executed +# after the install of the kernel and modules are complete. +# +POST_INSTALL = ${SSH} /sbin/dracut -f /boot/initramfs-test.img $KERNEL_VERSION + +# Guests sometimes get stuck on reboot. We wait 3 seconds after running +# the reboot command and then do a full power-cycle of the guest. +# This forces the guest to restart. +# +POWERCYCLE_AFTER_REBOOT = 3 + +# We do the same after the halt command, but this time we wait 20 seconds. +POWEROFF_AFTER_HALT = 20 + + +# As the defaults.conf file has a POWER_CYCLE option already defined, +# and options can not be defined in the same section more than once +# (all DEFAULTS sections are considered the same). We use the +# DEFAULTS OVERRIDE to tell ktest.pl to ignore the previous defined +# options, for the options set in the OVERRIDE section. +# +DEFAULTS OVERRIDE + +# Instead of using the default POWER_CYCLE option defined in +# defaults.conf, we use virsh to cycle it. To do so, we destroy +# the guest, wait 5 seconds, and then start it up again. +# Crude, but effective. +# +POWER_CYCLE = ${VMWARE_POWER_CYCLE} + + +DEFAULTS + +# The following files each handle a different test case. +# Having them included allows you to set up more than one machine and share +# the same tests. +INCLUDE include/patchcheck.conf +INCLUDE include/tests.conf +INCLUDE include/bisect.conf +INCLUDE include/min-config.conf diff --git a/tools/testing/ktest/ktest.pl b/tools/testing/ktest/ktest.pl index 4e2450964517..09d1578f9d66 100755 --- a/tools/testing/ktest/ktest.pl +++ b/tools/testing/ktest/ktest.pl @@ -24,7 +24,7 @@ my %evals; #default opts my %default = ( - "MAILER" => "sendmail", # default mailer + "MAILER" => "sendmail", # default mailer "EMAIL_ON_ERROR" => 1, "EMAIL_WHEN_FINISHED" => 1, "EMAIL_WHEN_CANCELED" => 0, @@ -36,15 +36,15 @@ my %default = ( "CLOSE_CONSOLE_SIGNAL" => "INT", "TIMEOUT" => 120, "TMP_DIR" => "/tmp/ktest/\${MACHINE}", - "SLEEP_TIME" => 60, # sleep time between tests + "SLEEP_TIME" => 60, # sleep time between tests "BUILD_NOCLEAN" => 0, "REBOOT_ON_ERROR" => 0, "POWEROFF_ON_ERROR" => 0, "REBOOT_ON_SUCCESS" => 1, "POWEROFF_ON_SUCCESS" => 0, "BUILD_OPTIONS" => "", - "BISECT_SLEEP_TIME" => 60, # sleep time between bisects - "PATCHCHECK_SLEEP_TIME" => 60, # sleep time between patch checks + "BISECT_SLEEP_TIME" => 60, # sleep time between bisects + "PATCHCHECK_SLEEP_TIME" => 60, # sleep time between patch checks "CLEAR_LOG" => 0, "BISECT_MANUAL" => 0, "BISECT_SKIP" => 1, @@ -512,6 +512,69 @@ $config_help{"REBOOT_SCRIPT"} = << "EOF" EOF ; +# used with process_expression() +my $d = 0; + +# defined before get_test_name() +my $in_die = 0; + +# defined before process_warning_line() +my $check_build_re = ".*:.*(warning|error|Error):.*"; +my $utf8_quote = "\\x{e2}\\x{80}(\\x{98}|\\x{99})"; + +# defined before child_finished() +my $child_done; + +# config_ignore holds the configs that were set (or unset) for +# a good config and we will ignore these configs for the rest +# of a config bisect. These configs stay as they were. +my %config_ignore; + +# config_set holds what all configs were set as. +my %config_set; + +# config_off holds the set of configs that the bad config had disabled. +# We need to record them and set them in the .config when running +# olddefconfig, because olddefconfig keeps the defaults. +my %config_off; + +# config_off_tmp holds a set of configs to turn off for now +my @config_off_tmp; + +# config_list is the set of configs that are being tested +my %config_list; +my %null_config; + +my %dependency; + +# found above run_config_bisect() +my $pass = 1; + +# found above add_dep() + +my %depends; +my %depcount; +my $iflevel = 0; +my @ifdeps; + +# prevent recursion +my %read_kconfigs; + +# found above test_this_config() +my %min_configs; +my %keep_configs; +my %save_configs; +my %processed_configs; +my %nochange_config; + +# +# These are first defined here, main function later on +# +sub run_command; +sub start_monitor; +sub end_monitor; +sub wait_for_monitor; + sub _logit { if (defined($opt{"LOG_FILE"})) { print LOG @_; @@ -537,7 +600,7 @@ sub read_prompt { my $ans; for (;;) { - if ($cancel) { + if ($cancel) { print "$prompt [y/n/C] "; } else { print "$prompt [Y/n] "; @@ -760,7 +823,7 @@ sub process_variables { # remove the space added in the beginning $retval =~ s/ //; - return "$retval" + return "$retval"; } sub set_value { @@ -863,7 +926,6 @@ sub value_defined { defined($opt{$2}); } -my $d = 0; sub process_expression { my ($name, $val) = @_; @@ -978,7 +1040,6 @@ sub __read_config { $override = 0; if ($type eq "TEST_START") { - if ($num_tests_set) { die "$name: $.: Can not specify both NUM_TESTS and TEST_START\n"; } @@ -1048,7 +1109,6 @@ sub __read_config { $test_num = $old_test_num; $repeat = $old_repeat; } - } elsif (/^\s*ELSE\b(.*)$/) { if (!$if) { die "$name: $.: ELSE found with out matching IF section\n$_"; @@ -1095,7 +1155,7 @@ sub __read_config { } } } - + if ( ! -r $file ) { die "$name: $.: Can't read file $file\n$_"; } @@ -1186,13 +1246,13 @@ sub __read_config { } sub get_test_case { - print "What test case would you like to run?\n"; - print " (build, install or boot)\n"; - print " Other tests are available but require editing ktest.conf\n"; - print " (see tools/testing/ktest/sample.conf)\n"; - my $ans = <STDIN>; - chomp $ans; - $default{"TEST_TYPE"} = $ans; + print "What test case would you like to run?\n"; + print " (build, install or boot)\n"; + print " Other tests are available but require editing ktest.conf\n"; + print " (see tools/testing/ktest/sample.conf)\n"; + my $ans = <STDIN>; + chomp $ans; + $default{"TEST_TYPE"} = $ans; } sub read_config { @@ -1368,11 +1428,6 @@ sub eval_option { return $option; } -sub run_command; -sub start_monitor; -sub end_monitor; -sub wait_for_monitor; - sub reboot { my ($time) = @_; my $powercycle = 0; @@ -1457,8 +1512,6 @@ sub do_not_reboot { ($test_type eq "config_bisect" && $opt{"CONFIG_BISECT_TYPE[$i]"} eq "build"); } -my $in_die = 0; - sub get_test_name() { my $name; @@ -1471,7 +1524,6 @@ sub get_test_name() { } sub dodie { - # avoid recursion return if ($in_die); $in_die = 1; @@ -1481,10 +1533,8 @@ sub dodie { doprint "CRITICAL FAILURE... [TEST $i] ", @_, "\n"; if ($reboot_on_error && !do_not_reboot) { - doprint "REBOOTING\n"; reboot_to_good; - } elsif ($poweroff_on_error && defined($power_off)) { doprint "POWERING OFF\n"; `$power_off`; @@ -1519,13 +1569,14 @@ sub dodie { close O; close L; } - send_email("KTEST: critical failure for test $i [$name]", - "Your test started at $script_start_time has failed with:\n@_\n", $log_file); + + send_email("KTEST: critical failure for test $i [$name]", + "Your test started at $script_start_time has failed with:\n@_\n", $log_file); } if ($monitor_cnt) { - # restore terminal settings - system("stty $stty_orig"); + # restore terminal settings + system("stty $stty_orig"); } if (defined($post_test)) { @@ -1709,81 +1760,81 @@ sub wait_for_monitor { } sub save_logs { - my ($result, $basedir) = @_; - my @t = localtime; - my $date = sprintf "%04d%02d%02d%02d%02d%02d", - 1900+$t[5],$t[4],$t[3],$t[2],$t[1],$t[0]; + my ($result, $basedir) = @_; + my @t = localtime; + my $date = sprintf "%04d%02d%02d%02d%02d%02d", + 1900+$t[5],$t[4],$t[3],$t[2],$t[1],$t[0]; - my $type = $build_type; - if ($type =~ /useconfig/) { - $type = "useconfig"; - } + my $type = $build_type; + if ($type =~ /useconfig/) { + $type = "useconfig"; + } - my $dir = "$machine-$test_type-$type-$result-$date"; + my $dir = "$machine-$test_type-$type-$result-$date"; - $dir = "$basedir/$dir"; + $dir = "$basedir/$dir"; - if (!-d $dir) { - mkpath($dir) or - dodie "can't create $dir"; - } + if (!-d $dir) { + mkpath($dir) or + dodie "can't create $dir"; + } - my %files = ( - "config" => $output_config, - "buildlog" => $buildlog, - "dmesg" => $dmesg, - "testlog" => $testlog, - ); + my %files = ( + "config" => $output_config, + "buildlog" => $buildlog, + "dmesg" => $dmesg, + "testlog" => $testlog, + ); - while (my ($name, $source) = each(%files)) { - if (-f "$source") { - cp "$source", "$dir/$name" or - dodie "failed to copy $source"; - } + while (my ($name, $source) = each(%files)) { + if (-f "$source") { + cp "$source", "$dir/$name" or + dodie "failed to copy $source"; } + } - doprint "*** Saved info to $dir ***\n"; + doprint "*** Saved info to $dir ***\n"; } sub fail { - if ($die_on_failure) { - dodie @_; - } + if ($die_on_failure) { + dodie @_; + } - doprint "FAILED\n"; + doprint "FAILED\n"; - my $i = $iteration; + my $i = $iteration; - # no need to reboot for just building. - if (!do_not_reboot) { - doprint "REBOOTING\n"; - reboot_to_good $sleep_time; - } + # no need to reboot for just building. + if (!do_not_reboot) { + doprint "REBOOTING\n"; + reboot_to_good $sleep_time; + } - my $name = ""; + my $name = ""; - if (defined($test_name)) { - $name = " ($test_name)"; - } + if (defined($test_name)) { + $name = " ($test_name)"; + } - print_times; + print_times; - doprint "%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%\n"; - doprint "%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%\n"; - doprint "KTEST RESULT: TEST $i$name Failed: ", @_, "\n"; - doprint "%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%\n"; - doprint "%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%\n"; + doprint "%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%\n"; + doprint "%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%\n"; + doprint "KTEST RESULT: TEST $i$name Failed: ", @_, "\n"; + doprint "%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%\n"; + doprint "%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%\n"; - if (defined($store_failures)) { - save_logs "fail", $store_failures; - } + if (defined($store_failures)) { + save_logs "fail", $store_failures; + } - if (defined($post_test)) { - run_command $post_test; - } + if (defined($post_test)) { + run_command $post_test; + } - return 1; + return 1; } sub run_command { @@ -1915,8 +1966,8 @@ sub _get_grub_index { my ($command, $target, $skip) = @_; return if (defined($grub_number) && defined($last_grub_menu) && - $last_grub_menu eq $grub_menu && defined($last_machine) && - $last_machine eq $machine); + $last_grub_menu eq $grub_menu && defined($last_machine) && + $last_machine eq $machine); doprint "Find $reboot_type menu ... "; $grub_number = -1; @@ -1924,8 +1975,8 @@ sub _get_grub_index { my $ssh_grub = $ssh_exec; $ssh_grub =~ s,\$SSH_COMMAND,$command,g; - open(IN, "$ssh_grub |") - or dodie "unable to execute $command"; + open(IN, "$ssh_grub |") or + dodie "unable to execute $command"; my $found = 0; @@ -1969,9 +2020,9 @@ sub get_grub_index { $target = '^menuentry.*' . $grub_menu_qt; $skip = '^menuentry\s|^submenu\s'; } elsif ($reboot_type eq "grub2bls") { - $command = $grub_bls_get; - $target = '^title=.*' . $grub_menu_qt; - $skip = '^title='; + $command = $grub_bls_get; + $target = '^title=.*' . $grub_menu_qt; + $skip = '^title='; } else { return; } @@ -1979,8 +2030,7 @@ sub get_grub_index { _get_grub_index($command, $target, $skip); } -sub wait_for_input -{ +sub wait_for_input { my ($fp, $time) = @_; my $start_time; my $rin; @@ -2096,7 +2146,6 @@ sub monitor { my $version_found = 0; while (!$done) { - if ($bug && defined($stop_after_failure) && $stop_after_failure >= 0) { my $time = $stop_after_failure - (time - $failure_start); @@ -2349,9 +2398,6 @@ sub start_monitor_and_install { return monitor; } -my $check_build_re = ".*:.*(warning|error|Error):.*"; -my $utf8_quote = "\\x{e2}\\x{80}(\\x{98}|\\x{99})"; - sub process_warning_line { my ($line) = @_; @@ -2394,7 +2440,7 @@ sub check_buildlog { while (<IN>) { if (/$check_build_re/) { my $warning = process_warning_line $_; - + $warnings_list{$warning} = 1; } } @@ -2571,7 +2617,6 @@ sub build { run_command "mv $outputdir/config_temp $output_config" or dodie "moving config_temp"; } - } elsif (!$noclean) { unlink "$output_config"; run_command "$make mrproper" or @@ -2594,6 +2639,9 @@ sub build { # Run old config regardless, to enforce min configurations make_oldconfig; + if (not defined($build_options)){ + $build_options = ""; + } my $build_ret = run_command "$make $build_options", $buildlog; if (defined($post_build)) { @@ -2649,14 +2697,15 @@ sub success { print_times; - doprint "\n\n*******************************************\n"; - doprint "*******************************************\n"; - doprint "KTEST RESULT: TEST $i$name SUCCESS!!!! **\n"; - doprint "*******************************************\n"; - doprint "*******************************************\n"; + doprint "\n\n"; + doprint "*******************************************\n"; + doprint "*******************************************\n"; + doprint "KTEST RESULT: TEST $i$name SUCCESS!!!! **\n"; + doprint "*******************************************\n"; + doprint "*******************************************\n"; if (defined($store_successes)) { - save_logs "success", $store_successes; + save_logs "success", $store_successes; } if ($i != $opt{"NUM_TESTS"} && !do_not_reboot) { @@ -2698,8 +2747,6 @@ sub child_run_test { exit $run_command_status; } -my $child_done; - sub child_finished { $child_done = 1; } @@ -3031,7 +3078,6 @@ sub bisect { } if ($do_check) { - # get current HEAD my $head = get_sha1("HEAD"); @@ -3071,13 +3117,11 @@ sub bisect { run_command "git bisect replay $replay" or dodie "failed to run replay"; } else { - run_command "git bisect good $good" or dodie "could not set bisect good to $good"; run_git_bisect "git bisect bad $bad" or dodie "could not set bisect bad to $bad"; - } if (defined($start)) { @@ -3103,35 +3147,13 @@ sub bisect { success $i; } -# config_ignore holds the configs that were set (or unset) for -# a good config and we will ignore these configs for the rest -# of a config bisect. These configs stay as they were. -my %config_ignore; - -# config_set holds what all configs were set as. -my %config_set; - -# config_off holds the set of configs that the bad config had disabled. -# We need to record them and set them in the .config when running -# olddefconfig, because olddefconfig keeps the defaults. -my %config_off; - -# config_off_tmp holds a set of configs to turn off for now -my @config_off_tmp; - -# config_list is the set of configs that are being tested -my %config_list; -my %null_config; - -my %dependency; - sub assign_configs { my ($hash, $config) = @_; doprint "Reading configs from $config\n"; - open (IN, $config) - or dodie "Failed to read $config"; + open (IN, $config) or + dodie "Failed to read $config"; while (<IN>) { chomp; @@ -3219,8 +3241,6 @@ sub config_bisect_end { doprint "***************************************\n\n"; } -my $pass = 1; - sub run_config_bisect { my ($good, $bad, $last_result) = @_; my $reset = ""; @@ -3243,13 +3263,13 @@ sub run_config_bisect { $ret = run_config_bisect_test $config_bisect_type; if ($ret) { - doprint "NEW GOOD CONFIG ($pass)\n"; + doprint "NEW GOOD CONFIG ($pass)\n"; system("cp $output_config $tmpdir/good_config.tmp.$pass"); $pass++; # Return 3 for good config return 3; } else { - doprint "NEW BAD CONFIG ($pass)\n"; + doprint "NEW BAD CONFIG ($pass)\n"; system("cp $output_config $tmpdir/bad_config.tmp.$pass"); $pass++; # Return 4 for bad config @@ -3284,10 +3304,11 @@ sub config_bisect { if (!defined($config_bisect_exec)) { # First check the location that ktest.pl ran - my @locations = ( "$pwd/config-bisect.pl", - "$dirname/config-bisect.pl", - "$builddir/tools/testing/ktest/config-bisect.pl", - undef ); + my @locations = ( + "$pwd/config-bisect.pl", + "$dirname/config-bisect.pl", + "$builddir/tools/testing/ktest/config-bisect.pl", + undef ); foreach my $loc (@locations) { doprint "loc = $loc\n"; $config_bisect_exec = $loc; @@ -3368,7 +3389,7 @@ sub config_bisect { } while ($ret == 3 || $ret == 4); if ($ret == 2) { - config_bisect_end "$good_config.tmp", "$bad_config.tmp"; + config_bisect_end "$good_config.tmp", "$bad_config.tmp"; } return $ret if ($ret < 0); @@ -3511,14 +3532,6 @@ sub patchcheck { return 1; } -my %depends; -my %depcount; -my $iflevel = 0; -my @ifdeps; - -# prevent recursion -my %read_kconfigs; - sub add_dep { # $config depends on $dep my ($config, $dep) = @_; @@ -3548,7 +3561,6 @@ sub read_kconfig { my $cont = 0; my $line; - if (! -f $kconfig) { doprint "file $kconfig does not exist, skipping\n"; return; @@ -3630,8 +3642,8 @@ sub read_kconfig { sub read_depends { # find out which arch this is by the kconfig file - open (IN, $output_config) - or dodie "Failed to read $output_config"; + open (IN, $output_config) or + dodie "Failed to read $output_config"; my $arch; while (<IN>) { if (m,Linux/(\S+)\s+\S+\s+Kernel Configuration,) { @@ -3657,7 +3669,7 @@ sub read_depends { if (! -f $kconfig && $arch =~ /\d$/) { my $orig = $arch; - # some subarchs have numbers, truncate them + # some subarchs have numbers, truncate them $arch =~ s/\d*$//; $kconfig = "$builddir/arch/$arch/Kconfig"; if (! -f $kconfig) { @@ -3706,7 +3718,6 @@ sub get_depends { my @configs; while ($dep =~ /[$valid]/) { - if ($dep =~ /^[^$valid]*([$valid]+)/) { my $conf = "CONFIG_" . $1; @@ -3721,12 +3732,6 @@ sub get_depends { return @configs; } -my %min_configs; -my %keep_configs; -my %save_configs; -my %processed_configs; -my %nochange_config; - sub test_this_config { my ($config) = @_; @@ -3852,7 +3857,7 @@ sub make_min_config { foreach my $config (@config_keys) { my $kconfig = chomp_config $config; if (!defined $depcount{$kconfig}) { - $depcount{$kconfig} = 0; + $depcount{$kconfig} = 0; } } @@ -3887,7 +3892,6 @@ sub make_min_config { my $take_two = 0; while (!$done) { - my $config; my $found; @@ -3898,7 +3902,7 @@ sub make_min_config { # Sort keys by who is most dependent on @test_configs = sort { $depcount{chomp_config($b)} <=> $depcount{chomp_config($a)} } - @test_configs ; + @test_configs ; # Put configs that did not modify the config at the end. my $reset = 1; @@ -3954,13 +3958,13 @@ sub make_min_config { my $failed = 0; build "oldconfig" or $failed = 1; if (!$failed) { - start_monitor_and_install or $failed = 1; + start_monitor_and_install or $failed = 1; - if ($type eq "test" && !$failed) { - do_run_test or $failed = 1; - } + if ($type eq "test" && !$failed) { + do_run_test or $failed = 1; + } - end_monitor; + end_monitor; } $in_bisect = 0; @@ -3974,8 +3978,8 @@ sub make_min_config { # update new ignore configs if (defined($ignore_config)) { - open (OUT, ">$temp_config") - or dodie "Can't write to $temp_config"; + open (OUT, ">$temp_config") or + dodie "Can't write to $temp_config"; foreach my $config (keys %save_configs) { print OUT "$save_configs{$config}\n"; } @@ -4002,8 +4006,8 @@ sub make_min_config { } # Save off all the current mandatory configs - open (OUT, ">$temp_config") - or dodie "Can't write to $temp_config"; + open (OUT, ">$temp_config") or + dodie "Can't write to $temp_config"; foreach my $config (keys %keep_configs) { print OUT "$keep_configs{$config}\n"; } @@ -4041,7 +4045,6 @@ sub make_warnings_file { open(IN, $buildlog) or dodie "Can't open $buildlog"; while (<IN>) { - # Some compilers use UTF-8 extended for quotes # for distcc heterogeneous systems, this causes issues s/$utf8_quote/'/g; @@ -4057,98 +4060,6 @@ sub make_warnings_file { success $i; } -$#ARGV < 1 or die "ktest.pl version: $VERSION\n usage: ktest.pl [config-file]\n"; - -if ($#ARGV == 0) { - $ktest_config = $ARGV[0]; - if (! -f $ktest_config) { - print "$ktest_config does not exist.\n"; - if (!read_yn "Create it?") { - exit 0; - } - } -} - -if (! -f $ktest_config) { - $newconfig = 1; - get_test_case; - open(OUT, ">$ktest_config") or die "Can not create $ktest_config"; - print OUT << "EOF" -# Generated by ktest.pl -# - -# PWD is a ktest.pl variable that will result in the process working -# directory that ktest.pl is executed in. - -# THIS_DIR is automatically assigned the PWD of the path that generated -# the config file. It is best to use this variable when assigning other -# directory paths within this directory. This allows you to easily -# move the test cases to other locations or to other machines. -# -THIS_DIR := $variable{"PWD"} - -# Define each test with TEST_START -# The config options below it will override the defaults -TEST_START -TEST_TYPE = $default{"TEST_TYPE"} - -DEFAULTS -EOF -; - close(OUT); -} -read_config $ktest_config; - -if (defined($opt{"LOG_FILE"})) { - $opt{"LOG_FILE"} = eval_option("LOG_FILE", $opt{"LOG_FILE"}, -1); -} - -# Append any configs entered in manually to the config file. -my @new_configs = keys %entered_configs; -if ($#new_configs >= 0) { - print "\nAppending entered in configs to $ktest_config\n"; - open(OUT, ">>$ktest_config") or die "Can not append to $ktest_config"; - foreach my $config (@new_configs) { - print OUT "$config = $entered_configs{$config}\n"; - $opt{$config} = process_variables($entered_configs{$config}); - } -} - -if (defined($opt{"LOG_FILE"})) { - if ($opt{"CLEAR_LOG"}) { - unlink $opt{"LOG_FILE"}; - } - open(LOG, ">> $opt{LOG_FILE}") or die "Can't write to $opt{LOG_FILE}"; - LOG->autoflush(1); -} - -doprint "\n\nSTARTING AUTOMATED TESTS\n\n"; - -for (my $i = 0, my $repeat = 1; $i <= $opt{"NUM_TESTS"}; $i += $repeat) { - - if (!$i) { - doprint "DEFAULT OPTIONS:\n"; - } else { - doprint "\nTEST $i OPTIONS"; - if (defined($repeat_tests{$i})) { - $repeat = $repeat_tests{$i}; - doprint " ITERATE $repeat"; - } - doprint "\n"; - } - - foreach my $option (sort keys %opt) { - - if ($option =~ /\[(\d+)\]$/) { - next if ($i != $1); - } else { - next if ($i); - } - - doprint "$option = $opt{$option}\n"; - } -} - sub option_defined { my ($option) = @_; @@ -4261,7 +4172,6 @@ sub do_send_mail { } sub send_email { - if (defined($mailto)) { if (!defined($mailer)) { doprint "No email sent: email or mailer not specified in config.\n"; @@ -4274,12 +4184,103 @@ sub send_email { sub cancel_test { if ($email_when_canceled) { my $name = get_test_name; - send_email("KTEST: Your [$name] test was cancelled", - "Your test started at $script_start_time was cancelled: sig int"); + send_email("KTEST: Your [$name] test was cancelled", + "Your test started at $script_start_time was cancelled: sig int"); } die "\nCaught Sig Int, test interrupted: $!\n" } +$#ARGV < 1 or die "ktest.pl version: $VERSION\n usage: ktest.pl [config-file]\n"; + +if ($#ARGV == 0) { + $ktest_config = $ARGV[0]; + if (! -f $ktest_config) { + print "$ktest_config does not exist.\n"; + if (!read_yn "Create it?") { + exit 0; + } + } +} + +if (! -f $ktest_config) { + $newconfig = 1; + get_test_case; + open(OUT, ">$ktest_config") or die "Can not create $ktest_config"; + print OUT << "EOF" +# Generated by ktest.pl +# + +# PWD is a ktest.pl variable that will result in the process working +# directory that ktest.pl is executed in. + +# THIS_DIR is automatically assigned the PWD of the path that generated +# the config file. It is best to use this variable when assigning other +# directory paths within this directory. This allows you to easily +# move the test cases to other locations or to other machines. +# +THIS_DIR := $variable{"PWD"} + +# Define each test with TEST_START +# The config options below it will override the defaults +TEST_START +TEST_TYPE = $default{"TEST_TYPE"} + +DEFAULTS +EOF +; + close(OUT); +} +read_config $ktest_config; + +if (defined($opt{"LOG_FILE"})) { + $opt{"LOG_FILE"} = eval_option("LOG_FILE", $opt{"LOG_FILE"}, -1); +} + +# Append any configs entered in manually to the config file. +my @new_configs = keys %entered_configs; +if ($#new_configs >= 0) { + print "\nAppending entered in configs to $ktest_config\n"; + open(OUT, ">>$ktest_config") or die "Can not append to $ktest_config"; + foreach my $config (@new_configs) { + print OUT "$config = $entered_configs{$config}\n"; + $opt{$config} = process_variables($entered_configs{$config}); + } +} + +if (defined($opt{"LOG_FILE"})) { + if ($opt{"CLEAR_LOG"}) { + unlink $opt{"LOG_FILE"}; + } + open(LOG, ">> $opt{LOG_FILE}") or die "Can't write to $opt{LOG_FILE}"; + LOG->autoflush(1); +} + +doprint "\n\nSTARTING AUTOMATED TESTS\n\n"; + +for (my $i = 0, my $repeat = 1; $i <= $opt{"NUM_TESTS"}; $i += $repeat) { + + if (!$i) { + doprint "DEFAULT OPTIONS:\n"; + } else { + doprint "\nTEST $i OPTIONS"; + if (defined($repeat_tests{$i})) { + $repeat = $repeat_tests{$i}; + doprint " ITERATE $repeat"; + } + doprint "\n"; + } + + foreach my $option (sort keys %opt) { + if ($option =~ /\[(\d+)\]$/) { + next if ($i != $1); + } else { + next if ($i); + } + + doprint "$option = $opt{$option}\n"; + } +} + $SIG{INT} = qw(cancel_test); # First we need to do is the builds @@ -4323,15 +4324,15 @@ for (my $i = 1; $i <= $opt{"NUM_TESTS"}; $i++) { # The first test may override the PRE_KTEST option if ($i == 1) { - if (defined($pre_ktest)) { - doprint "\n"; - run_command $pre_ktest; - } - if ($email_when_started) { + if (defined($pre_ktest)) { + doprint "\n"; + run_command $pre_ktest; + } + if ($email_when_started) { my $name = get_test_name; - send_email("KTEST: Your [$name] test was started", - "Your test was started on $script_start_time"); - } + send_email("KTEST: Your [$name] test was started", + "Your test was started on $script_start_time"); + } } # Any test can override the POST_KTEST option @@ -4409,7 +4410,7 @@ for (my $i = 1; $i <= $opt{"NUM_TESTS"}; $i++) { my $ret = run_command $pre_test; if (!$ret && defined($pre_test_die) && $pre_test_die) { - dodie "failed to pre_test\n"; + dodie "failed to pre_test\n"; } } @@ -4503,12 +4504,11 @@ if ($opt{"POWEROFF_ON_SUCCESS"}) { run_command $switch_to_good; } - doprint "\n $successes of $opt{NUM_TESTS} tests were successful\n\n"; if ($email_when_finished) { send_email("KTEST: Your test has finished!", - "$successes of $opt{NUM_TESTS} tests started at $script_start_time were successful!"); + "$successes of $opt{NUM_TESTS} tests started at $script_start_time were successful!"); } if (defined($opt{"LOG_FILE"})) { @@ -4517,3 +4517,12 @@ if (defined($opt{"LOG_FILE"})) { } exit 0; + +## +# The following are here to standardize tabs/spaces/etc across the most likely editors +### + +# Local Variables: +# mode: perl +# End: +# vim: softtabstop=4 diff --git a/tools/testing/selftests/Makefile b/tools/testing/selftests/Makefile index 6c575cf34a71..bc3299a20338 100644 --- a/tools/testing/selftests/Makefile +++ b/tools/testing/selftests/Makefile @@ -25,6 +25,7 @@ TARGETS += ir TARGETS += kcmp TARGETS += kexec TARGETS += kvm +TARGETS += landlock TARGETS += lib TARGETS += livepatch TARGETS += lkdtm diff --git a/tools/testing/selftests/dma/dma_map_benchmark.c b/tools/testing/selftests/dma/dma_map_benchmark.c index fb23ce9617ea..485dff51bad2 100644 --- a/tools/testing/selftests/dma/dma_map_benchmark.c +++ b/tools/testing/selftests/dma/dma_map_benchmark.c @@ -1,6 +1,6 @@ // SPDX-License-Identifier: GPL-2.0-only /* - * Copyright (C) 2020 Hisilicon Limited. + * Copyright (C) 2020 HiSilicon Limited. */ #include <fcntl.h> @@ -40,7 +40,8 @@ struct map_benchmark { __u32 dma_bits; /* DMA addressing capability */ __u32 dma_dir; /* DMA data direction */ __u32 dma_trans_ns; /* time for DMA transmission in ns */ - __u8 expansion[80]; /* For future use */ + __u32 granule; /* how many PAGE_SIZE will do map/unmap once a time */ + __u8 expansion[76]; /* For future use */ }; int main(int argc, char **argv) @@ -51,11 +52,13 @@ int main(int argc, char **argv) int threads = 1, seconds = 20, node = -1; /* default dma mask 32bit, bidirectional DMA */ int bits = 32, xdelay = 0, dir = DMA_MAP_BIDIRECTIONAL; + /* default granule 1 PAGESIZE */ + int granule = 1; int cmd = DMA_MAP_BENCHMARK; char *p; - while ((opt = getopt(argc, argv, "t:s:n:b:d:x:")) != -1) { + while ((opt = getopt(argc, argv, "t:s:n:b:d:x:g:")) != -1) { switch (opt) { case 't': threads = atoi(optarg); @@ -75,6 +78,9 @@ int main(int argc, char **argv) case 'x': xdelay = atoi(optarg); break; + case 'g': + granule = atoi(optarg); + break; default: return -1; } @@ -110,6 +116,11 @@ int main(int argc, char **argv) exit(1); } + if (granule < 1 || granule > 1024) { + fprintf(stderr, "invalid granule size\n"); + exit(1); + } + fd = open("/sys/kernel/debug/dma_map_benchmark", O_RDWR); if (fd == -1) { perror("open"); @@ -123,14 +134,15 @@ int main(int argc, char **argv) map.dma_bits = bits; map.dma_dir = dir; map.dma_trans_ns = xdelay; + map.granule = granule; if (ioctl(fd, cmd, &map)) { perror("ioctl"); exit(1); } - printf("dma mapping benchmark: threads:%d seconds:%d node:%d dir:%s\n", - threads, seconds, node, dir[directions]); + printf("dma mapping benchmark: threads:%d seconds:%d node:%d dir:%s granule: %d\n", + threads, seconds, node, dir[directions], granule); printf("average map latency(us):%.1f standard deviation:%.1f\n", map.avg_map_100ns/10.0, map.map_stddev/10.0); printf("average unmap latency(us):%.1f standard deviation:%.1f\n", diff --git a/tools/testing/selftests/kvm/.gitignore b/tools/testing/selftests/kvm/.gitignore index 7bd7e776c266..bd83158e0e0b 100644 --- a/tools/testing/selftests/kvm/.gitignore +++ b/tools/testing/selftests/kvm/.gitignore @@ -1,6 +1,7 @@ # SPDX-License-Identifier: GPL-2.0-only /aarch64/get-reg-list /aarch64/get-reg-list-sve +/aarch64/vgic_init /s390x/memop /s390x/resets /s390x/sync_regs_test @@ -38,6 +39,7 @@ /dirty_log_perf_test /hardware_disable_test /kvm_create_max_vcpus +/kvm_page_table_test /memslot_modification_stress_test /set_memory_region_test /steal_time diff --git a/tools/testing/selftests/kvm/Makefile b/tools/testing/selftests/kvm/Makefile index cb95b5bace7b..e439d027939d 100644 --- a/tools/testing/selftests/kvm/Makefile +++ b/tools/testing/selftests/kvm/Makefile @@ -72,16 +72,19 @@ TEST_GEN_PROGS_x86_64 += dirty_log_test TEST_GEN_PROGS_x86_64 += dirty_log_perf_test TEST_GEN_PROGS_x86_64 += hardware_disable_test TEST_GEN_PROGS_x86_64 += kvm_create_max_vcpus +TEST_GEN_PROGS_x86_64 += kvm_page_table_test TEST_GEN_PROGS_x86_64 += memslot_modification_stress_test TEST_GEN_PROGS_x86_64 += set_memory_region_test TEST_GEN_PROGS_x86_64 += steal_time TEST_GEN_PROGS_aarch64 += aarch64/get-reg-list TEST_GEN_PROGS_aarch64 += aarch64/get-reg-list-sve +TEST_GEN_PROGS_aarch64 += aarch64/vgic_init TEST_GEN_PROGS_aarch64 += demand_paging_test TEST_GEN_PROGS_aarch64 += dirty_log_test TEST_GEN_PROGS_aarch64 += dirty_log_perf_test TEST_GEN_PROGS_aarch64 += kvm_create_max_vcpus +TEST_GEN_PROGS_aarch64 += kvm_page_table_test TEST_GEN_PROGS_aarch64 += set_memory_region_test TEST_GEN_PROGS_aarch64 += steal_time @@ -91,6 +94,7 @@ TEST_GEN_PROGS_s390x += s390x/sync_regs_test TEST_GEN_PROGS_s390x += demand_paging_test TEST_GEN_PROGS_s390x += dirty_log_test TEST_GEN_PROGS_s390x += kvm_create_max_vcpus +TEST_GEN_PROGS_s390x += kvm_page_table_test TEST_GEN_PROGS_s390x += set_memory_region_test TEST_GEN_PROGS += $(TEST_GEN_PROGS_$(UNAME_M)) diff --git a/tools/testing/selftests/kvm/aarch64/vgic_init.c b/tools/testing/selftests/kvm/aarch64/vgic_init.c new file mode 100644 index 000000000000..623f31a14326 --- /dev/null +++ b/tools/testing/selftests/kvm/aarch64/vgic_init.c @@ -0,0 +1,551 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * vgic init sequence tests + * + * Copyright (C) 2020, Red Hat, Inc. + */ +#define _GNU_SOURCE +#include <linux/kernel.h> +#include <sys/syscall.h> +#include <asm/kvm.h> +#include <asm/kvm_para.h> + +#include "test_util.h" +#include "kvm_util.h" +#include "processor.h" + +#define NR_VCPUS 4 + +#define REDIST_REGION_ATTR_ADDR(count, base, flags, index) (((uint64_t)(count) << 52) | \ + ((uint64_t)((base) >> 16) << 16) | ((uint64_t)(flags) << 12) | index) +#define REG_OFFSET(vcpu, offset) (((uint64_t)vcpu << 32) | offset) + +#define GICR_TYPER 0x8 + +struct vm_gic { + struct kvm_vm *vm; + int gic_fd; +}; + +static int max_ipa_bits; + +/* helper to access a redistributor register */ +static int access_redist_reg(int gicv3_fd, int vcpu, int offset, + uint32_t *val, bool write) +{ + uint64_t attr = REG_OFFSET(vcpu, offset); + + return _kvm_device_access(gicv3_fd, KVM_DEV_ARM_VGIC_GRP_REDIST_REGS, + attr, val, write); +} + +/* dummy guest code */ +static void guest_code(void) +{ + GUEST_SYNC(0); + GUEST_SYNC(1); + GUEST_SYNC(2); + GUEST_DONE(); +} + +/* we don't want to assert on run execution, hence that helper */ +static int run_vcpu(struct kvm_vm *vm, uint32_t vcpuid) +{ + ucall_init(vm, NULL); + int ret = _vcpu_ioctl(vm, vcpuid, KVM_RUN, NULL); + if (ret) + return -errno; + return 0; +} + +static struct vm_gic vm_gic_create(void) +{ + struct vm_gic v; + + v.vm = vm_create_default_with_vcpus(NR_VCPUS, 0, 0, guest_code, NULL); + v.gic_fd = kvm_create_device(v.vm, KVM_DEV_TYPE_ARM_VGIC_V3, false); + + return v; +} + +static void vm_gic_destroy(struct vm_gic *v) +{ + close(v->gic_fd); + kvm_vm_free(v->vm); +} + +/** + * Helper routine that performs KVM device tests in general and + * especially ARM_VGIC_V3 ones. Eventually the ARM_VGIC_V3 + * device gets created, a legacy RDIST region is set at @0x0 + * and a DIST region is set @0x60000 + */ +static void subtest_dist_rdist(struct vm_gic *v) +{ + int ret; + uint64_t addr; + + /* Check existing group/attributes */ + kvm_device_check_attr(v->gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR, + KVM_VGIC_V3_ADDR_TYPE_DIST); + + kvm_device_check_attr(v->gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR, + KVM_VGIC_V3_ADDR_TYPE_REDIST); + + /* check non existing attribute */ + ret = _kvm_device_check_attr(v->gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR, 0); + TEST_ASSERT(ret && errno == ENXIO, "attribute not supported"); + + /* misaligned DIST and REDIST address settings */ + addr = 0x1000; + ret = _kvm_device_access(v->gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR, + KVM_VGIC_V3_ADDR_TYPE_DIST, &addr, true); + TEST_ASSERT(ret && errno == EINVAL, "GICv3 dist base not 64kB aligned"); + + ret = _kvm_device_access(v->gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR, + KVM_VGIC_V3_ADDR_TYPE_REDIST, &addr, true); + TEST_ASSERT(ret && errno == EINVAL, "GICv3 redist base not 64kB aligned"); + + /* out of range address */ + if (max_ipa_bits) { + addr = 1ULL << max_ipa_bits; + ret = _kvm_device_access(v->gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR, + KVM_VGIC_V3_ADDR_TYPE_DIST, &addr, true); + TEST_ASSERT(ret && errno == E2BIG, "dist address beyond IPA limit"); + + ret = _kvm_device_access(v->gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR, + KVM_VGIC_V3_ADDR_TYPE_REDIST, &addr, true); + TEST_ASSERT(ret && errno == E2BIG, "redist address beyond IPA limit"); + } + + /* set REDIST base address @0x0*/ + addr = 0x00000; + kvm_device_access(v->gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR, + KVM_VGIC_V3_ADDR_TYPE_REDIST, &addr, true); + + /* Attempt to create a second legacy redistributor region */ + addr = 0xE0000; + ret = _kvm_device_access(v->gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR, + KVM_VGIC_V3_ADDR_TYPE_REDIST, &addr, true); + TEST_ASSERT(ret && errno == EEXIST, "GICv3 redist base set again"); + + /* Attempt to mix legacy and new redistributor regions */ + addr = REDIST_REGION_ATTR_ADDR(NR_VCPUS, 0x100000, 0, 0); + ret = _kvm_device_access(v->gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR, + KVM_VGIC_V3_ADDR_TYPE_REDIST_REGION, &addr, true); + TEST_ASSERT(ret && errno == EINVAL, "attempt to mix GICv3 REDIST and REDIST_REGION"); + + /* + * Set overlapping DIST / REDIST, cannot be detected here. Will be detected + * on first vcpu run instead. + */ + addr = 3 * 2 * 0x10000; + kvm_device_access(v->gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR, KVM_VGIC_V3_ADDR_TYPE_DIST, + &addr, true); +} + +/* Test the new REDIST region API */ +static void subtest_redist_regions(struct vm_gic *v) +{ + uint64_t addr, expected_addr; + int ret; + + ret = kvm_device_check_attr(v->gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR, + KVM_VGIC_V3_ADDR_TYPE_REDIST); + TEST_ASSERT(!ret, "Multiple redist regions advertised"); + + addr = REDIST_REGION_ATTR_ADDR(NR_VCPUS, 0x100000, 2, 0); + ret = _kvm_device_access(v->gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR, + KVM_VGIC_V3_ADDR_TYPE_REDIST_REGION, &addr, true); + TEST_ASSERT(ret && errno == EINVAL, "redist region attr value with flags != 0"); + + addr = REDIST_REGION_ATTR_ADDR(0, 0x100000, 0, 0); + ret = _kvm_device_access(v->gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR, + KVM_VGIC_V3_ADDR_TYPE_REDIST_REGION, &addr, true); + TEST_ASSERT(ret && errno == EINVAL, "redist region attr value with count== 0"); + + addr = REDIST_REGION_ATTR_ADDR(2, 0x200000, 0, 1); + ret = _kvm_device_access(v->gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR, + KVM_VGIC_V3_ADDR_TYPE_REDIST_REGION, &addr, true); + TEST_ASSERT(ret && errno == EINVAL, + "attempt to register the first rdist region with index != 0"); + + addr = REDIST_REGION_ATTR_ADDR(2, 0x201000, 0, 1); + ret = _kvm_device_access(v->gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR, + KVM_VGIC_V3_ADDR_TYPE_REDIST_REGION, &addr, true); + TEST_ASSERT(ret && errno == EINVAL, "rdist region with misaligned address"); + + addr = REDIST_REGION_ATTR_ADDR(2, 0x200000, 0, 0); + kvm_device_access(v->gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR, + KVM_VGIC_V3_ADDR_TYPE_REDIST_REGION, &addr, true); + + addr = REDIST_REGION_ATTR_ADDR(2, 0x200000, 0, 1); + ret = _kvm_device_access(v->gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR, + KVM_VGIC_V3_ADDR_TYPE_REDIST_REGION, &addr, true); + TEST_ASSERT(ret && errno == EINVAL, "register an rdist region with already used index"); + + addr = REDIST_REGION_ATTR_ADDR(1, 0x210000, 0, 2); + ret = _kvm_device_access(v->gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR, + KVM_VGIC_V3_ADDR_TYPE_REDIST_REGION, &addr, true); + TEST_ASSERT(ret && errno == EINVAL, + "register an rdist region overlapping with another one"); + + addr = REDIST_REGION_ATTR_ADDR(1, 0x240000, 0, 2); + ret = _kvm_device_access(v->gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR, + KVM_VGIC_V3_ADDR_TYPE_REDIST_REGION, &addr, true); + TEST_ASSERT(ret && errno == EINVAL, "register redist region with index not +1"); + + addr = REDIST_REGION_ATTR_ADDR(1, 0x240000, 0, 1); + kvm_device_access(v->gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR, + KVM_VGIC_V3_ADDR_TYPE_REDIST_REGION, &addr, true); + + addr = REDIST_REGION_ATTR_ADDR(1, 1ULL << max_ipa_bits, 0, 2); + ret = _kvm_device_access(v->gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR, + KVM_VGIC_V3_ADDR_TYPE_REDIST_REGION, &addr, true); + TEST_ASSERT(ret && errno == E2BIG, + "register redist region with base address beyond IPA range"); + + addr = 0x260000; + ret = _kvm_device_access(v->gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR, + KVM_VGIC_V3_ADDR_TYPE_REDIST, &addr, true); + TEST_ASSERT(ret && errno == EINVAL, + "Mix KVM_VGIC_V3_ADDR_TYPE_REDIST and REDIST_REGION"); + + /* + * Now there are 2 redist regions: + * region 0 @ 0x200000 2 redists + * region 1 @ 0x240000 1 redist + * Attempt to read their characteristics + */ + + addr = REDIST_REGION_ATTR_ADDR(0, 0, 0, 0); + expected_addr = REDIST_REGION_ATTR_ADDR(2, 0x200000, 0, 0); + ret = _kvm_device_access(v->gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR, + KVM_VGIC_V3_ADDR_TYPE_REDIST_REGION, &addr, false); + TEST_ASSERT(!ret && addr == expected_addr, "read characteristics of region #0"); + + addr = REDIST_REGION_ATTR_ADDR(0, 0, 0, 1); + expected_addr = REDIST_REGION_ATTR_ADDR(1, 0x240000, 0, 1); + ret = _kvm_device_access(v->gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR, + KVM_VGIC_V3_ADDR_TYPE_REDIST_REGION, &addr, false); + TEST_ASSERT(!ret && addr == expected_addr, "read characteristics of region #1"); + + addr = REDIST_REGION_ATTR_ADDR(0, 0, 0, 2); + ret = _kvm_device_access(v->gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR, + KVM_VGIC_V3_ADDR_TYPE_REDIST_REGION, &addr, false); + TEST_ASSERT(ret && errno == ENOENT, "read characteristics of non existing region"); + + addr = 0x260000; + kvm_device_access(v->gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR, + KVM_VGIC_V3_ADDR_TYPE_DIST, &addr, true); + + addr = REDIST_REGION_ATTR_ADDR(1, 0x260000, 0, 2); + ret = _kvm_device_access(v->gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR, + KVM_VGIC_V3_ADDR_TYPE_REDIST_REGION, &addr, true); + TEST_ASSERT(ret && errno == EINVAL, "register redist region colliding with dist"); +} + +/* + * VGIC KVM device is created and initialized before the secondary CPUs + * get created + */ +static void test_vgic_then_vcpus(void) +{ + struct vm_gic v; + int ret, i; + + v.vm = vm_create_default(0, 0, guest_code); + v.gic_fd = kvm_create_device(v.vm, KVM_DEV_TYPE_ARM_VGIC_V3, false); + + subtest_dist_rdist(&v); + + /* Add the rest of the VCPUs */ + for (i = 1; i < NR_VCPUS; ++i) + vm_vcpu_add_default(v.vm, i, guest_code); + + ret = run_vcpu(v.vm, 3); + TEST_ASSERT(ret == -EINVAL, "dist/rdist overlap detected on 1st vcpu run"); + + vm_gic_destroy(&v); +} + +/* All the VCPUs are created before the VGIC KVM device gets initialized */ +static void test_vcpus_then_vgic(void) +{ + struct vm_gic v; + int ret; + + v = vm_gic_create(); + + subtest_dist_rdist(&v); + + ret = run_vcpu(v.vm, 3); + TEST_ASSERT(ret == -EINVAL, "dist/rdist overlap detected on 1st vcpu run"); + + vm_gic_destroy(&v); +} + +static void test_new_redist_regions(void) +{ + void *dummy = NULL; + struct vm_gic v; + uint64_t addr; + int ret; + + v = vm_gic_create(); + subtest_redist_regions(&v); + kvm_device_access(v.gic_fd, KVM_DEV_ARM_VGIC_GRP_CTRL, + KVM_DEV_ARM_VGIC_CTRL_INIT, NULL, true); + + ret = run_vcpu(v.vm, 3); + TEST_ASSERT(ret == -ENXIO, "running without sufficient number of rdists"); + vm_gic_destroy(&v); + + /* step2 */ + + v = vm_gic_create(); + subtest_redist_regions(&v); + + addr = REDIST_REGION_ATTR_ADDR(1, 0x280000, 0, 2); + kvm_device_access(v.gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR, + KVM_VGIC_V3_ADDR_TYPE_REDIST_REGION, &addr, true); + + ret = run_vcpu(v.vm, 3); + TEST_ASSERT(ret == -EBUSY, "running without vgic explicit init"); + + vm_gic_destroy(&v); + + /* step 3 */ + + v = vm_gic_create(); + subtest_redist_regions(&v); + + _kvm_device_access(v.gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR, + KVM_VGIC_V3_ADDR_TYPE_REDIST_REGION, dummy, true); + TEST_ASSERT(ret && errno == EFAULT, + "register a third region allowing to cover the 4 vcpus"); + + addr = REDIST_REGION_ATTR_ADDR(1, 0x280000, 0, 2); + kvm_device_access(v.gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR, + KVM_VGIC_V3_ADDR_TYPE_REDIST_REGION, &addr, true); + + kvm_device_access(v.gic_fd, KVM_DEV_ARM_VGIC_GRP_CTRL, + KVM_DEV_ARM_VGIC_CTRL_INIT, NULL, true); + + ret = run_vcpu(v.vm, 3); + TEST_ASSERT(!ret, "vcpu run"); + + vm_gic_destroy(&v); +} + +static void test_typer_accesses(void) +{ + struct vm_gic v; + uint64_t addr; + uint32_t val; + int ret, i; + + v.vm = vm_create_default(0, 0, guest_code); + + v.gic_fd = kvm_create_device(v.vm, KVM_DEV_TYPE_ARM_VGIC_V3, false); + + vm_vcpu_add_default(v.vm, 3, guest_code); + + ret = access_redist_reg(v.gic_fd, 1, GICR_TYPER, &val, false); + TEST_ASSERT(ret && errno == EINVAL, "attempting to read GICR_TYPER of non created vcpu"); + + vm_vcpu_add_default(v.vm, 1, guest_code); + + ret = access_redist_reg(v.gic_fd, 1, GICR_TYPER, &val, false); + TEST_ASSERT(ret && errno == EBUSY, "read GICR_TYPER before GIC initialized"); + + vm_vcpu_add_default(v.vm, 2, guest_code); + + kvm_device_access(v.gic_fd, KVM_DEV_ARM_VGIC_GRP_CTRL, + KVM_DEV_ARM_VGIC_CTRL_INIT, NULL, true); + + for (i = 0; i < NR_VCPUS ; i++) { + ret = access_redist_reg(v.gic_fd, 0, GICR_TYPER, &val, false); + TEST_ASSERT(!ret && !val, "read GICR_TYPER before rdist region setting"); + } + + addr = REDIST_REGION_ATTR_ADDR(2, 0x200000, 0, 0); + kvm_device_access(v.gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR, + KVM_VGIC_V3_ADDR_TYPE_REDIST_REGION, &addr, true); + + /* The 2 first rdists should be put there (vcpu 0 and 3) */ + ret = access_redist_reg(v.gic_fd, 0, GICR_TYPER, &val, false); + TEST_ASSERT(!ret && !val, "read typer of rdist #0"); + + ret = access_redist_reg(v.gic_fd, 3, GICR_TYPER, &val, false); + TEST_ASSERT(!ret && val == 0x310, "read typer of rdist #1"); + + addr = REDIST_REGION_ATTR_ADDR(10, 0x100000, 0, 1); + ret = _kvm_device_access(v.gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR, + KVM_VGIC_V3_ADDR_TYPE_REDIST_REGION, &addr, true); + TEST_ASSERT(ret && errno == EINVAL, "collision with previous rdist region"); + + ret = access_redist_reg(v.gic_fd, 1, GICR_TYPER, &val, false); + TEST_ASSERT(!ret && val == 0x100, + "no redist region attached to vcpu #1 yet, last cannot be returned"); + + ret = access_redist_reg(v.gic_fd, 2, GICR_TYPER, &val, false); + TEST_ASSERT(!ret && val == 0x200, + "no redist region attached to vcpu #2, last cannot be returned"); + + addr = REDIST_REGION_ATTR_ADDR(10, 0x20000, 0, 1); + kvm_device_access(v.gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR, + KVM_VGIC_V3_ADDR_TYPE_REDIST_REGION, &addr, true); + + ret = access_redist_reg(v.gic_fd, 1, GICR_TYPER, &val, false); + TEST_ASSERT(!ret && val == 0x100, "read typer of rdist #1"); + + ret = access_redist_reg(v.gic_fd, 2, GICR_TYPER, &val, false); + TEST_ASSERT(!ret && val == 0x210, + "read typer of rdist #1, last properly returned"); + + vm_gic_destroy(&v); +} + +/** + * Test GICR_TYPER last bit with new redist regions + * rdist regions #1 and #2 are contiguous + * rdist region #0 @0x100000 2 rdist capacity + * rdists: 0, 3 (Last) + * rdist region #1 @0x240000 2 rdist capacity + * rdists: 5, 4 (Last) + * rdist region #2 @0x200000 2 rdist capacity + * rdists: 1, 2 + */ +static void test_last_bit_redist_regions(void) +{ + uint32_t vcpuids[] = { 0, 3, 5, 4, 1, 2 }; + struct vm_gic v; + uint64_t addr; + uint32_t val; + int ret; + + v.vm = vm_create_default_with_vcpus(6, 0, 0, guest_code, vcpuids); + + v.gic_fd = kvm_create_device(v.vm, KVM_DEV_TYPE_ARM_VGIC_V3, false); + + kvm_device_access(v.gic_fd, KVM_DEV_ARM_VGIC_GRP_CTRL, + KVM_DEV_ARM_VGIC_CTRL_INIT, NULL, true); + + addr = REDIST_REGION_ATTR_ADDR(2, 0x100000, 0, 0); + kvm_device_access(v.gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR, + KVM_VGIC_V3_ADDR_TYPE_REDIST_REGION, &addr, true); + + addr = REDIST_REGION_ATTR_ADDR(2, 0x240000, 0, 1); + kvm_device_access(v.gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR, + KVM_VGIC_V3_ADDR_TYPE_REDIST_REGION, &addr, true); + + addr = REDIST_REGION_ATTR_ADDR(2, 0x200000, 0, 2); + kvm_device_access(v.gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR, + KVM_VGIC_V3_ADDR_TYPE_REDIST_REGION, &addr, true); + + ret = access_redist_reg(v.gic_fd, 0, GICR_TYPER, &val, false); + TEST_ASSERT(!ret && val == 0x000, "read typer of rdist #0"); + + ret = access_redist_reg(v.gic_fd, 1, GICR_TYPER, &val, false); + TEST_ASSERT(!ret && val == 0x100, "read typer of rdist #1"); + + ret = access_redist_reg(v.gic_fd, 2, GICR_TYPER, &val, false); + TEST_ASSERT(!ret && val == 0x200, "read typer of rdist #2"); + + ret = access_redist_reg(v.gic_fd, 3, GICR_TYPER, &val, false); + TEST_ASSERT(!ret && val == 0x310, "read typer of rdist #3"); + + ret = access_redist_reg(v.gic_fd, 5, GICR_TYPER, &val, false); + TEST_ASSERT(!ret && val == 0x500, "read typer of rdist #5"); + + ret = access_redist_reg(v.gic_fd, 4, GICR_TYPER, &val, false); + TEST_ASSERT(!ret && val == 0x410, "read typer of rdist #4"); + + vm_gic_destroy(&v); +} + +/* Test last bit with legacy region */ +static void test_last_bit_single_rdist(void) +{ + uint32_t vcpuids[] = { 0, 3, 5, 4, 1, 2 }; + struct vm_gic v; + uint64_t addr; + uint32_t val; + int ret; + + v.vm = vm_create_default_with_vcpus(6, 0, 0, guest_code, vcpuids); + + v.gic_fd = kvm_create_device(v.vm, KVM_DEV_TYPE_ARM_VGIC_V3, false); + + kvm_device_access(v.gic_fd, KVM_DEV_ARM_VGIC_GRP_CTRL, + KVM_DEV_ARM_VGIC_CTRL_INIT, NULL, true); + + addr = 0x10000; + kvm_device_access(v.gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR, + KVM_VGIC_V3_ADDR_TYPE_REDIST, &addr, true); + + ret = access_redist_reg(v.gic_fd, 0, GICR_TYPER, &val, false); + TEST_ASSERT(!ret && val == 0x000, "read typer of rdist #0"); + + ret = access_redist_reg(v.gic_fd, 3, GICR_TYPER, &val, false); + TEST_ASSERT(!ret && val == 0x300, "read typer of rdist #1"); + + ret = access_redist_reg(v.gic_fd, 5, GICR_TYPER, &val, false); + TEST_ASSERT(!ret && val == 0x500, "read typer of rdist #2"); + + ret = access_redist_reg(v.gic_fd, 1, GICR_TYPER, &val, false); + TEST_ASSERT(!ret && val == 0x100, "read typer of rdist #3"); + + ret = access_redist_reg(v.gic_fd, 2, GICR_TYPER, &val, false); + TEST_ASSERT(!ret && val == 0x210, "read typer of rdist #3"); + + vm_gic_destroy(&v); +} + +void test_kvm_device(void) +{ + struct vm_gic v; + int ret, fd; + + v.vm = vm_create_default_with_vcpus(NR_VCPUS, 0, 0, guest_code, NULL); + + /* try to create a non existing KVM device */ + ret = _kvm_create_device(v.vm, 0, true, &fd); + TEST_ASSERT(ret && errno == ENODEV, "unsupported device"); + + /* trial mode with VGIC_V3 device */ + ret = _kvm_create_device(v.vm, KVM_DEV_TYPE_ARM_VGIC_V3, true, &fd); + if (ret) { + print_skip("GICv3 not supported"); + exit(KSFT_SKIP); + } + v.gic_fd = kvm_create_device(v.vm, KVM_DEV_TYPE_ARM_VGIC_V3, false); + + ret = _kvm_create_device(v.vm, KVM_DEV_TYPE_ARM_VGIC_V3, false, &fd); + TEST_ASSERT(ret && errno == EEXIST, "create GICv3 device twice"); + + kvm_create_device(v.vm, KVM_DEV_TYPE_ARM_VGIC_V3, true); + + if (!_kvm_create_device(v.vm, KVM_DEV_TYPE_ARM_VGIC_V2, true, &fd)) { + ret = _kvm_create_device(v.vm, KVM_DEV_TYPE_ARM_VGIC_V2, false, &fd); + TEST_ASSERT(ret && errno == EINVAL, "create GICv2 while v3 exists"); + } + + vm_gic_destroy(&v); +} + +int main(int ac, char **av) +{ + max_ipa_bits = kvm_check_cap(KVM_CAP_ARM_VM_IPA_SIZE); + + test_kvm_device(); + test_vcpus_then_vgic(); + test_vgic_then_vcpus(); + test_new_redist_regions(); + test_typer_accesses(); + test_last_bit_redist_regions(); + test_last_bit_single_rdist(); + + return 0; +} diff --git a/tools/testing/selftests/kvm/dirty_log_test.c b/tools/testing/selftests/kvm/dirty_log_test.c index bb2752d78fe3..81edbd23d371 100644 --- a/tools/testing/selftests/kvm/dirty_log_test.c +++ b/tools/testing/selftests/kvm/dirty_log_test.c @@ -17,6 +17,7 @@ #include <linux/bitmap.h> #include <linux/bitops.h> #include <asm/barrier.h> +#include <linux/atomic.h> #include "kvm_util.h" #include "test_util.h" @@ -137,12 +138,20 @@ static uint64_t host_clear_count; static uint64_t host_track_next_count; /* Whether dirty ring reset is requested, or finished */ -static sem_t dirty_ring_vcpu_stop; -static sem_t dirty_ring_vcpu_cont; +static sem_t sem_vcpu_stop; +static sem_t sem_vcpu_cont; +/* + * This is only set by main thread, and only cleared by vcpu thread. It is + * used to request vcpu thread to stop at the next GUEST_SYNC, since GUEST_SYNC + * is the only place that we'll guarantee both "dirty bit" and "dirty data" + * will match. E.g., SIG_IPI won't guarantee that if the vcpu is interrupted + * after setting dirty bit but before the data is written. + */ +static atomic_t vcpu_sync_stop_requested; /* * This is updated by the vcpu thread to tell the host whether it's a * ring-full event. It should only be read until a sem_wait() of - * dirty_ring_vcpu_stop and before vcpu continues to run. + * sem_vcpu_stop and before vcpu continues to run. */ static bool dirty_ring_vcpu_ring_full; /* @@ -234,6 +243,17 @@ static void clear_log_collect_dirty_pages(struct kvm_vm *vm, int slot, kvm_vm_clear_dirty_log(vm, slot, bitmap, 0, num_pages); } +/* Should only be called after a GUEST_SYNC */ +static void vcpu_handle_sync_stop(void) +{ + if (atomic_read(&vcpu_sync_stop_requested)) { + /* It means main thread is sleeping waiting */ + atomic_set(&vcpu_sync_stop_requested, false); + sem_post(&sem_vcpu_stop); + sem_wait_until(&sem_vcpu_cont); + } +} + static void default_after_vcpu_run(struct kvm_vm *vm, int ret, int err) { struct kvm_run *run = vcpu_state(vm, VCPU_ID); @@ -244,6 +264,8 @@ static void default_after_vcpu_run(struct kvm_vm *vm, int ret, int err) TEST_ASSERT(get_ucall(vm, VCPU_ID, NULL) == UCALL_SYNC, "Invalid guest sync status: exit_reason=%s\n", exit_reason_str(run->exit_reason)); + + vcpu_handle_sync_stop(); } static bool dirty_ring_supported(void) @@ -301,13 +323,13 @@ static void dirty_ring_wait_vcpu(void) { /* This makes sure that hardware PML cache flushed */ vcpu_kick(); - sem_wait_until(&dirty_ring_vcpu_stop); + sem_wait_until(&sem_vcpu_stop); } static void dirty_ring_continue_vcpu(void) { pr_info("Notifying vcpu to continue\n"); - sem_post(&dirty_ring_vcpu_cont); + sem_post(&sem_vcpu_cont); } static void dirty_ring_collect_dirty_pages(struct kvm_vm *vm, int slot, @@ -361,11 +383,11 @@ static void dirty_ring_after_vcpu_run(struct kvm_vm *vm, int ret, int err) /* Update the flag first before pause */ WRITE_ONCE(dirty_ring_vcpu_ring_full, run->exit_reason == KVM_EXIT_DIRTY_RING_FULL); - sem_post(&dirty_ring_vcpu_stop); + sem_post(&sem_vcpu_stop); pr_info("vcpu stops because %s...\n", dirty_ring_vcpu_ring_full ? "dirty ring is full" : "vcpu is kicked out"); - sem_wait_until(&dirty_ring_vcpu_cont); + sem_wait_until(&sem_vcpu_cont); pr_info("vcpu continues now.\n"); } else { TEST_ASSERT(false, "Invalid guest sync status: " @@ -377,7 +399,7 @@ static void dirty_ring_after_vcpu_run(struct kvm_vm *vm, int ret, int err) static void dirty_ring_before_vcpu_join(void) { /* Kick another round of vcpu just to make sure it will quit */ - sem_post(&dirty_ring_vcpu_cont); + sem_post(&sem_vcpu_cont); } struct log_mode { @@ -505,9 +527,8 @@ static void *vcpu_worker(void *data) */ sigmask->len = 8; pthread_sigmask(0, NULL, sigset); + sigdelset(sigset, SIG_IPI); vcpu_ioctl(vm, VCPU_ID, KVM_SET_SIGNAL_MASK, sigmask); - sigaddset(sigset, SIG_IPI); - pthread_sigmask(SIG_BLOCK, sigset, NULL); sigemptyset(sigset); sigaddset(sigset, SIG_IPI); @@ -768,7 +789,25 @@ static void run_test(enum vm_guest_mode mode, void *arg) usleep(p->interval * 1000); log_mode_collect_dirty_pages(vm, TEST_MEM_SLOT_INDEX, bmap, host_num_pages); + + /* + * See vcpu_sync_stop_requested definition for details on why + * we need to stop vcpu when verify data. + */ + atomic_set(&vcpu_sync_stop_requested, true); + sem_wait_until(&sem_vcpu_stop); + /* + * NOTE: for dirty ring, it's possible that we didn't stop at + * GUEST_SYNC but instead we stopped because ring is full; + * that's okay too because ring full means we're only missing + * the flush of the last page, and since we handle the last + * page specially verification will succeed anyway. + */ + assert(host_log_mode == LOG_MODE_DIRTY_RING || + atomic_read(&vcpu_sync_stop_requested) == false); vm_dirty_log_verify(mode, bmap); + sem_post(&sem_vcpu_cont); + iteration++; sync_global_to_guest(vm, iteration); } @@ -818,9 +857,10 @@ int main(int argc, char *argv[]) .interval = TEST_HOST_LOOP_INTERVAL, }; int opt, i; + sigset_t sigset; - sem_init(&dirty_ring_vcpu_stop, 0, 0); - sem_init(&dirty_ring_vcpu_cont, 0, 0); + sem_init(&sem_vcpu_stop, 0, 0); + sem_init(&sem_vcpu_cont, 0, 0); guest_modes_append_default(); @@ -876,6 +916,11 @@ int main(int argc, char *argv[]) srandom(time(0)); + /* Ensure that vCPU threads start with SIG_IPI blocked. */ + sigemptyset(&sigset); + sigaddset(&sigset, SIG_IPI); + pthread_sigmask(SIG_BLOCK, &sigset, NULL); + if (host_log_mode_option == LOG_MODE_ALL) { /* Run each log mode */ for (i = 0; i < LOG_MODE_NUM; i++) { diff --git a/tools/testing/selftests/kvm/include/kvm_util.h b/tools/testing/selftests/kvm/include/kvm_util.h index 0f4258eaa629..a8f022794ce3 100644 --- a/tools/testing/selftests/kvm/include/kvm_util.h +++ b/tools/testing/selftests/kvm/include/kvm_util.h @@ -69,9 +69,6 @@ enum vm_guest_mode { #define MIN_PAGE_SIZE (1U << MIN_PAGE_SHIFT) #define PTES_PER_MIN_PAGE ptes_per_page(MIN_PAGE_SIZE) -#define vm_guest_mode_string(m) vm_guest_mode_string[m] -extern const char * const vm_guest_mode_string[]; - struct vm_guest_mode_params { unsigned int pa_bits; unsigned int va_bits; @@ -85,6 +82,7 @@ int vm_enable_cap(struct kvm_vm *vm, struct kvm_enable_cap *cap); int vcpu_enable_cap(struct kvm_vm *vm, uint32_t vcpu_id, struct kvm_enable_cap *cap); void vm_enable_dirty_ring(struct kvm_vm *vm, uint32_t ring_size); +const char *vm_guest_mode_string(uint32_t i); struct kvm_vm *vm_create(enum vm_guest_mode mode, uint64_t phy_pages, int perm); void kvm_vm_free(struct kvm_vm *vmp); @@ -225,6 +223,15 @@ int vcpu_nested_state_set(struct kvm_vm *vm, uint32_t vcpuid, #endif void *vcpu_map_dirty_ring(struct kvm_vm *vm, uint32_t vcpuid); +int _kvm_device_check_attr(int dev_fd, uint32_t group, uint64_t attr); +int kvm_device_check_attr(int dev_fd, uint32_t group, uint64_t attr); +int _kvm_create_device(struct kvm_vm *vm, uint64_t type, bool test, int *fd); +int kvm_create_device(struct kvm_vm *vm, uint64_t type, bool test); +int _kvm_device_access(int dev_fd, uint32_t group, uint64_t attr, + void *val, bool write); +int kvm_device_access(int dev_fd, uint32_t group, uint64_t attr, + void *val, bool write); + const char *exit_reason_str(unsigned int exit_reason); void virt_pgd_alloc(struct kvm_vm *vm, uint32_t pgd_memslot); diff --git a/tools/testing/selftests/kvm/include/test_util.h b/tools/testing/selftests/kvm/include/test_util.h index b7f41399f22c..fade3130eb01 100644 --- a/tools/testing/selftests/kvm/include/test_util.h +++ b/tools/testing/selftests/kvm/include/test_util.h @@ -71,13 +71,32 @@ enum vm_mem_backing_src_type { VM_MEM_SRC_ANONYMOUS, VM_MEM_SRC_ANONYMOUS_THP, VM_MEM_SRC_ANONYMOUS_HUGETLB, + VM_MEM_SRC_ANONYMOUS_HUGETLB_16KB, + VM_MEM_SRC_ANONYMOUS_HUGETLB_64KB, + VM_MEM_SRC_ANONYMOUS_HUGETLB_512KB, + VM_MEM_SRC_ANONYMOUS_HUGETLB_1MB, + VM_MEM_SRC_ANONYMOUS_HUGETLB_2MB, + VM_MEM_SRC_ANONYMOUS_HUGETLB_8MB, + VM_MEM_SRC_ANONYMOUS_HUGETLB_16MB, + VM_MEM_SRC_ANONYMOUS_HUGETLB_32MB, + VM_MEM_SRC_ANONYMOUS_HUGETLB_256MB, + VM_MEM_SRC_ANONYMOUS_HUGETLB_512MB, + VM_MEM_SRC_ANONYMOUS_HUGETLB_1GB, + VM_MEM_SRC_ANONYMOUS_HUGETLB_2GB, + VM_MEM_SRC_ANONYMOUS_HUGETLB_16GB, + NUM_SRC_TYPES, }; struct vm_mem_backing_src_alias { const char *name; - enum vm_mem_backing_src_type type; + uint32_t flag; }; +bool thp_configured(void); +size_t get_trans_hugepagesz(void); +size_t get_def_hugetlb_pagesz(void); +const struct vm_mem_backing_src_alias *vm_mem_backing_src_alias(uint32_t i); +size_t get_backing_src_pagesz(uint32_t i); void backing_src_help(void); enum vm_mem_backing_src_type parse_backing_src_type(const char *type_name); diff --git a/tools/testing/selftests/kvm/kvm_page_table_test.c b/tools/testing/selftests/kvm/kvm_page_table_test.c new file mode 100644 index 000000000000..1c4753fff19e --- /dev/null +++ b/tools/testing/selftests/kvm/kvm_page_table_test.c @@ -0,0 +1,506 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * KVM page table test + * + * Copyright (C) 2021, Huawei, Inc. + * + * Make sure that THP has been enabled or enough HUGETLB pages with specific + * page size have been pre-allocated on your system, if you are planning to + * use hugepages to back the guest memory for testing. + */ + +#define _GNU_SOURCE /* for program_invocation_name */ + +#include <stdio.h> +#include <stdlib.h> +#include <time.h> +#include <pthread.h> +#include <semaphore.h> + +#include "test_util.h" +#include "kvm_util.h" +#include "processor.h" +#include "guest_modes.h" + +#define TEST_MEM_SLOT_INDEX 1 + +/* Default size(1GB) of the memory for testing */ +#define DEFAULT_TEST_MEM_SIZE (1 << 30) + +/* Default guest test virtual memory offset */ +#define DEFAULT_GUEST_TEST_MEM 0xc0000000 + +/* Different guest memory accessing stages */ +enum test_stage { + KVM_BEFORE_MAPPINGS, + KVM_CREATE_MAPPINGS, + KVM_UPDATE_MAPPINGS, + KVM_ADJUST_MAPPINGS, + NUM_TEST_STAGES, +}; + +static const char * const test_stage_string[] = { + "KVM_BEFORE_MAPPINGS", + "KVM_CREATE_MAPPINGS", + "KVM_UPDATE_MAPPINGS", + "KVM_ADJUST_MAPPINGS", +}; + +struct vcpu_args { + int vcpu_id; + bool vcpu_write; +}; + +struct test_args { + struct kvm_vm *vm; + uint64_t guest_test_virt_mem; + uint64_t host_page_size; + uint64_t host_num_pages; + uint64_t large_page_size; + uint64_t large_num_pages; + uint64_t host_pages_per_lpage; + enum vm_mem_backing_src_type src_type; + struct vcpu_args vcpu_args[KVM_MAX_VCPUS]; +}; + +/* + * Guest variables. Use addr_gva2hva() if these variables need + * to be changed in host. + */ +static enum test_stage guest_test_stage; + +/* Host variables */ +static uint32_t nr_vcpus = 1; +static struct test_args test_args; +static enum test_stage *current_stage; +static bool host_quit; + +/* Whether the test stage is updated, or completed */ +static sem_t test_stage_updated; +static sem_t test_stage_completed; + +/* + * Guest physical memory offset of the testing memory slot. + * This will be set to the topmost valid physical address minus + * the test memory size. + */ +static uint64_t guest_test_phys_mem; + +/* + * Guest virtual memory offset of the testing memory slot. + * Must not conflict with identity mapped test code. + */ +static uint64_t guest_test_virt_mem = DEFAULT_GUEST_TEST_MEM; + +static void guest_code(int vcpu_id) +{ + struct test_args *p = &test_args; + struct vcpu_args *vcpu_args = &p->vcpu_args[vcpu_id]; + enum test_stage *current_stage = &guest_test_stage; + uint64_t addr; + int i, j; + + /* Make sure vCPU args data structure is not corrupt */ + GUEST_ASSERT(vcpu_args->vcpu_id == vcpu_id); + + while (true) { + addr = p->guest_test_virt_mem; + + switch (READ_ONCE(*current_stage)) { + /* + * All vCPU threads will be started in this stage, + * where guest code of each vCPU will do nothing. + */ + case KVM_BEFORE_MAPPINGS: + break; + + /* + * Before dirty logging, vCPUs concurrently access the first + * 8 bytes of each page (host page/large page) within the same + * memory region with different accessing types (read/write). + * Then KVM will create normal page mappings or huge block + * mappings for them. + */ + case KVM_CREATE_MAPPINGS: + for (i = 0; i < p->large_num_pages; i++) { + if (vcpu_args->vcpu_write) + *(uint64_t *)addr = 0x0123456789ABCDEF; + else + READ_ONCE(*(uint64_t *)addr); + + addr += p->large_page_size; + } + break; + + /* + * During dirty logging, KVM will only update attributes of the + * normal page mappings from RO to RW if memory backing src type + * is anonymous. In other cases, KVM will split the huge block + * mappings into normal page mappings if memory backing src type + * is THP or HUGETLB. + */ + case KVM_UPDATE_MAPPINGS: + if (p->src_type == VM_MEM_SRC_ANONYMOUS) { + for (i = 0; i < p->host_num_pages; i++) { + *(uint64_t *)addr = 0x0123456789ABCDEF; + addr += p->host_page_size; + } + break; + } + + for (i = 0; i < p->large_num_pages; i++) { + /* + * Write to the first host page in each large + * page region, and triger break of large pages. + */ + *(uint64_t *)addr = 0x0123456789ABCDEF; + + /* + * Access the middle host pages in each large + * page region. Since dirty logging is enabled, + * this will create new mappings at the smallest + * granularity. + */ + addr += p->large_page_size / 2; + for (j = 0; j < p->host_pages_per_lpage / 2; j++) { + READ_ONCE(*(uint64_t *)addr); + addr += p->host_page_size; + } + } + break; + + /* + * After dirty logging is stopped, vCPUs concurrently read + * from every single host page. Then KVM will coalesce the + * split page mappings back to block mappings. And a TLB + * conflict abort could occur here if TLB entries of the + * page mappings are not fully invalidated. + */ + case KVM_ADJUST_MAPPINGS: + for (i = 0; i < p->host_num_pages; i++) { + READ_ONCE(*(uint64_t *)addr); + addr += p->host_page_size; + } + break; + + default: + GUEST_ASSERT(0); + } + + GUEST_SYNC(1); + } +} + +static void *vcpu_worker(void *data) +{ + int ret; + struct vcpu_args *vcpu_args = data; + struct kvm_vm *vm = test_args.vm; + int vcpu_id = vcpu_args->vcpu_id; + struct kvm_run *run; + struct timespec start; + struct timespec ts_diff; + enum test_stage stage; + + vcpu_args_set(vm, vcpu_id, 1, vcpu_id); + run = vcpu_state(vm, vcpu_id); + + while (!READ_ONCE(host_quit)) { + ret = sem_wait(&test_stage_updated); + TEST_ASSERT(ret == 0, "Error in sem_wait"); + + if (READ_ONCE(host_quit)) + return NULL; + + clock_gettime(CLOCK_MONOTONIC_RAW, &start); + ret = _vcpu_run(vm, vcpu_id); + ts_diff = timespec_elapsed(start); + + TEST_ASSERT(ret == 0, "vcpu_run failed: %d\n", ret); + TEST_ASSERT(get_ucall(vm, vcpu_id, NULL) == UCALL_SYNC, + "Invalid guest sync status: exit_reason=%s\n", + exit_reason_str(run->exit_reason)); + + pr_debug("Got sync event from vCPU %d\n", vcpu_id); + stage = READ_ONCE(*current_stage); + + /* + * Here we can know the execution time of every + * single vcpu running in different test stages. + */ + pr_debug("vCPU %d has completed stage %s\n" + "execution time is: %ld.%.9lds\n\n", + vcpu_id, test_stage_string[stage], + ts_diff.tv_sec, ts_diff.tv_nsec); + + ret = sem_post(&test_stage_completed); + TEST_ASSERT(ret == 0, "Error in sem_post"); + } + + return NULL; +} + +struct test_params { + uint64_t phys_offset; + uint64_t test_mem_size; + enum vm_mem_backing_src_type src_type; +}; + +static struct kvm_vm *pre_init_before_test(enum vm_guest_mode mode, void *arg) +{ + int ret; + struct test_params *p = arg; + struct vcpu_args *vcpu_args; + enum vm_mem_backing_src_type src_type = p->src_type; + uint64_t large_page_size = get_backing_src_pagesz(src_type); + uint64_t guest_page_size = vm_guest_mode_params[mode].page_size; + uint64_t host_page_size = getpagesize(); + uint64_t test_mem_size = p->test_mem_size; + uint64_t guest_num_pages; + uint64_t alignment; + void *host_test_mem; + struct kvm_vm *vm; + int vcpu_id; + + /* Align up the test memory size */ + alignment = max(large_page_size, guest_page_size); + test_mem_size = (test_mem_size + alignment - 1) & ~(alignment - 1); + + /* Create a VM with enough guest pages */ + guest_num_pages = test_mem_size / guest_page_size; + vm = vm_create_with_vcpus(mode, nr_vcpus, + guest_num_pages, 0, guest_code, NULL); + + /* Align down GPA of the testing memslot */ + if (!p->phys_offset) + guest_test_phys_mem = (vm_get_max_gfn(vm) - guest_num_pages) * + guest_page_size; + else + guest_test_phys_mem = p->phys_offset; +#ifdef __s390x__ + alignment = max(0x100000, alignment); +#endif + guest_test_phys_mem &= ~(alignment - 1); + + /* Set up the shared data structure test_args */ + test_args.vm = vm; + test_args.guest_test_virt_mem = guest_test_virt_mem; + test_args.host_page_size = host_page_size; + test_args.host_num_pages = test_mem_size / host_page_size; + test_args.large_page_size = large_page_size; + test_args.large_num_pages = test_mem_size / large_page_size; + test_args.host_pages_per_lpage = large_page_size / host_page_size; + test_args.src_type = src_type; + + for (vcpu_id = 0; vcpu_id < KVM_MAX_VCPUS; vcpu_id++) { + vcpu_args = &test_args.vcpu_args[vcpu_id]; + vcpu_args->vcpu_id = vcpu_id; + vcpu_args->vcpu_write = !(vcpu_id % 2); + } + + /* Add an extra memory slot with specified backing src type */ + vm_userspace_mem_region_add(vm, src_type, guest_test_phys_mem, + TEST_MEM_SLOT_INDEX, guest_num_pages, 0); + + /* Do mapping(GVA->GPA) for the testing memory slot */ + virt_map(vm, guest_test_virt_mem, guest_test_phys_mem, guest_num_pages, 0); + + /* Cache the HVA pointer of the region */ + host_test_mem = addr_gpa2hva(vm, (vm_paddr_t)guest_test_phys_mem); + + /* Export shared structure test_args to guest */ + ucall_init(vm, NULL); + sync_global_to_guest(vm, test_args); + + ret = sem_init(&test_stage_updated, 0, 0); + TEST_ASSERT(ret == 0, "Error in sem_init"); + + ret = sem_init(&test_stage_completed, 0, 0); + TEST_ASSERT(ret == 0, "Error in sem_init"); + + current_stage = addr_gva2hva(vm, (vm_vaddr_t)(&guest_test_stage)); + *current_stage = NUM_TEST_STAGES; + + pr_info("Testing guest mode: %s\n", vm_guest_mode_string(mode)); + pr_info("Testing memory backing src type: %s\n", + vm_mem_backing_src_alias(src_type)->name); + pr_info("Testing memory backing src granularity: 0x%lx\n", + large_page_size); + pr_info("Testing memory size(aligned): 0x%lx\n", test_mem_size); + pr_info("Guest physical test memory offset: 0x%lx\n", + guest_test_phys_mem); + pr_info("Host virtual test memory offset: 0x%lx\n", + (uint64_t)host_test_mem); + pr_info("Number of testing vCPUs: %d\n", nr_vcpus); + + return vm; +} + +static void vcpus_complete_new_stage(enum test_stage stage) +{ + int ret; + int vcpus; + + /* Wake up all the vcpus to run new test stage */ + for (vcpus = 0; vcpus < nr_vcpus; vcpus++) { + ret = sem_post(&test_stage_updated); + TEST_ASSERT(ret == 0, "Error in sem_post"); + } + pr_debug("All vcpus have been notified to continue\n"); + + /* Wait for all the vcpus to complete new test stage */ + for (vcpus = 0; vcpus < nr_vcpus; vcpus++) { + ret = sem_wait(&test_stage_completed); + TEST_ASSERT(ret == 0, "Error in sem_wait"); + + pr_debug("%d vcpus have completed stage %s\n", + vcpus + 1, test_stage_string[stage]); + } + + pr_debug("All vcpus have completed stage %s\n", + test_stage_string[stage]); +} + +static void run_test(enum vm_guest_mode mode, void *arg) +{ + int ret; + pthread_t *vcpu_threads; + struct kvm_vm *vm; + int vcpu_id; + struct timespec start; + struct timespec ts_diff; + + /* Create VM with vCPUs and make some pre-initialization */ + vm = pre_init_before_test(mode, arg); + + vcpu_threads = malloc(nr_vcpus * sizeof(*vcpu_threads)); + TEST_ASSERT(vcpu_threads, "Memory allocation failed"); + + host_quit = false; + *current_stage = KVM_BEFORE_MAPPINGS; + + for (vcpu_id = 0; vcpu_id < nr_vcpus; vcpu_id++) { + pthread_create(&vcpu_threads[vcpu_id], NULL, vcpu_worker, + &test_args.vcpu_args[vcpu_id]); + } + + vcpus_complete_new_stage(*current_stage); + pr_info("Started all vCPUs successfully\n"); + + /* Test the stage of KVM creating mappings */ + *current_stage = KVM_CREATE_MAPPINGS; + + clock_gettime(CLOCK_MONOTONIC_RAW, &start); + vcpus_complete_new_stage(*current_stage); + ts_diff = timespec_elapsed(start); + + pr_info("KVM_CREATE_MAPPINGS: total execution time: %ld.%.9lds\n\n", + ts_diff.tv_sec, ts_diff.tv_nsec); + + /* Test the stage of KVM updating mappings */ + vm_mem_region_set_flags(vm, TEST_MEM_SLOT_INDEX, + KVM_MEM_LOG_DIRTY_PAGES); + + *current_stage = KVM_UPDATE_MAPPINGS; + + clock_gettime(CLOCK_MONOTONIC_RAW, &start); + vcpus_complete_new_stage(*current_stage); + ts_diff = timespec_elapsed(start); + + pr_info("KVM_UPDATE_MAPPINGS: total execution time: %ld.%.9lds\n\n", + ts_diff.tv_sec, ts_diff.tv_nsec); + + /* Test the stage of KVM adjusting mappings */ + vm_mem_region_set_flags(vm, TEST_MEM_SLOT_INDEX, 0); + + *current_stage = KVM_ADJUST_MAPPINGS; + + clock_gettime(CLOCK_MONOTONIC_RAW, &start); + vcpus_complete_new_stage(*current_stage); + ts_diff = timespec_elapsed(start); + + pr_info("KVM_ADJUST_MAPPINGS: total execution time: %ld.%.9lds\n\n", + ts_diff.tv_sec, ts_diff.tv_nsec); + + /* Tell the vcpu thread to quit */ + host_quit = true; + for (vcpu_id = 0; vcpu_id < nr_vcpus; vcpu_id++) { + ret = sem_post(&test_stage_updated); + TEST_ASSERT(ret == 0, "Error in sem_post"); + } + + for (vcpu_id = 0; vcpu_id < nr_vcpus; vcpu_id++) + pthread_join(vcpu_threads[vcpu_id], NULL); + + ret = sem_destroy(&test_stage_updated); + TEST_ASSERT(ret == 0, "Error in sem_destroy"); + + ret = sem_destroy(&test_stage_completed); + TEST_ASSERT(ret == 0, "Error in sem_destroy"); + + free(vcpu_threads); + ucall_uninit(vm); + kvm_vm_free(vm); +} + +static void help(char *name) +{ + puts(""); + printf("usage: %s [-h] [-p offset] [-m mode] " + "[-b mem-size] [-v vcpus] [-s mem-type]\n", name); + puts(""); + printf(" -p: specify guest physical test memory offset\n" + " Warning: a low offset can conflict with the loaded test code.\n"); + guest_modes_help(); + printf(" -b: specify size of the memory region for testing. e.g. 10M or 3G.\n" + " (default: 1G)\n"); + printf(" -v: specify the number of vCPUs to run\n" + " (default: 1)\n"); + printf(" -s: specify the type of memory that should be used to\n" + " back the guest data region.\n" + " (default: anonymous)\n\n"); + backing_src_help(); + puts(""); +} + +int main(int argc, char *argv[]) +{ + int max_vcpus = kvm_check_cap(KVM_CAP_MAX_VCPUS); + struct test_params p = { + .test_mem_size = DEFAULT_TEST_MEM_SIZE, + .src_type = VM_MEM_SRC_ANONYMOUS, + }; + int opt; + + guest_modes_append_default(); + + while ((opt = getopt(argc, argv, "hp:m:b:v:s:")) != -1) { + switch (opt) { + case 'p': + p.phys_offset = strtoull(optarg, NULL, 0); + break; + case 'm': + guest_modes_cmdline(optarg); + break; + case 'b': + p.test_mem_size = parse_size(optarg); + break; + case 'v': + nr_vcpus = atoi(optarg); + TEST_ASSERT(nr_vcpus > 0 && nr_vcpus <= max_vcpus, + "Invalid number of vcpus, must be between 1 and %d", max_vcpus); + break; + case 's': + p.src_type = parse_backing_src_type(optarg); + break; + case 'h': + default: + help(argv[0]); + exit(0); + } + } + + for_each_guest_mode(run_test, &p); + + return 0; +} diff --git a/tools/testing/selftests/kvm/lib/assert.c b/tools/testing/selftests/kvm/lib/assert.c index 5ebbd0d6b472..71ade6100fd3 100644 --- a/tools/testing/selftests/kvm/lib/assert.c +++ b/tools/testing/selftests/kvm/lib/assert.c @@ -71,9 +71,9 @@ test_assert(bool exp, const char *exp_str, fprintf(stderr, "==== Test Assertion Failure ====\n" " %s:%u: %s\n" - " pid=%d tid=%d - %s\n", + " pid=%d tid=%d errno=%d - %s\n", file, line, exp_str, getpid(), _gettid(), - strerror(errno)); + errno, strerror(errno)); test_dump_stack(); if (fmt) { fputs(" ", stderr); diff --git a/tools/testing/selftests/kvm/lib/kvm_util.c b/tools/testing/selftests/kvm/lib/kvm_util.c index b8849a1aca79..fc83f6c5902d 100644 --- a/tools/testing/selftests/kvm/lib/kvm_util.c +++ b/tools/testing/selftests/kvm/lib/kvm_util.c @@ -18,7 +18,6 @@ #include <unistd.h> #include <linux/kernel.h> -#define KVM_UTIL_PGS_PER_HUGEPG 512 #define KVM_UTIL_MIN_PFN 2 static int vcpu_mmap_sz(void); @@ -143,17 +142,24 @@ static void vm_open(struct kvm_vm *vm, int perm) "rc: %i errno: %i", vm->fd, errno); } -const char * const vm_guest_mode_string[] = { - "PA-bits:52, VA-bits:48, 4K pages", - "PA-bits:52, VA-bits:48, 64K pages", - "PA-bits:48, VA-bits:48, 4K pages", - "PA-bits:48, VA-bits:48, 64K pages", - "PA-bits:40, VA-bits:48, 4K pages", - "PA-bits:40, VA-bits:48, 64K pages", - "PA-bits:ANY, VA-bits:48, 4K pages", -}; -_Static_assert(sizeof(vm_guest_mode_string)/sizeof(char *) == NUM_VM_MODES, - "Missing new mode strings?"); +const char *vm_guest_mode_string(uint32_t i) +{ + static const char * const strings[] = { + [VM_MODE_P52V48_4K] = "PA-bits:52, VA-bits:48, 4K pages", + [VM_MODE_P52V48_64K] = "PA-bits:52, VA-bits:48, 64K pages", + [VM_MODE_P48V48_4K] = "PA-bits:48, VA-bits:48, 4K pages", + [VM_MODE_P48V48_64K] = "PA-bits:48, VA-bits:48, 64K pages", + [VM_MODE_P40V48_4K] = "PA-bits:40, VA-bits:48, 4K pages", + [VM_MODE_P40V48_64K] = "PA-bits:40, VA-bits:48, 64K pages", + [VM_MODE_PXXV48_4K] = "PA-bits:ANY, VA-bits:48, 4K pages", + }; + _Static_assert(sizeof(strings)/sizeof(char *) == NUM_VM_MODES, + "Missing new mode strings?"); + + TEST_ASSERT(i < NUM_VM_MODES, "Guest mode ID %d too big", i); + + return strings[i]; +} const struct vm_guest_mode_params vm_guest_mode_params[] = { { 52, 48, 0x1000, 12 }, @@ -514,7 +520,7 @@ static void vm_vcpu_rm(struct kvm_vm *vm, struct vcpu *vcpu) ret = munmap(vcpu->state, vcpu_mmap_sz()); TEST_ASSERT(ret == 0, "munmap of VCPU fd failed, rc: %i " "errno: %i", ret, errno); - close(vcpu->fd); + ret = close(vcpu->fd); TEST_ASSERT(ret == 0, "Close of VCPU fd failed, rc: %i " "errno: %i", ret, errno); @@ -534,7 +540,7 @@ void kvm_vm_release(struct kvm_vm *vmp) TEST_ASSERT(ret == 0, "Close of vm fd failed,\n" " vmp->fd: %i rc: %i errno: %i", vmp->fd, ret, errno); - close(vmp->kvm_fd); + ret = close(vmp->kvm_fd); TEST_ASSERT(ret == 0, "Close of /dev/kvm fd failed,\n" " vmp->kvm_fd: %i rc: %i errno: %i", vmp->kvm_fd, ret, errno); } @@ -681,7 +687,7 @@ void vm_userspace_mem_region_add(struct kvm_vm *vm, { int ret; struct userspace_mem_region *region; - size_t huge_page_size = KVM_UTIL_PGS_PER_HUGEPG * vm->page_size; + size_t backing_src_pagesz = get_backing_src_pagesz(src_type); size_t alignment; TEST_ASSERT(vm_adjust_num_guest_pages(vm->mode, npages) == npages, @@ -743,7 +749,7 @@ void vm_userspace_mem_region_add(struct kvm_vm *vm, #endif if (src_type == VM_MEM_SRC_ANONYMOUS_THP) - alignment = max(huge_page_size, alignment); + alignment = max(backing_src_pagesz, alignment); /* Add enough memory to align up if necessary */ if (alignment > 1) @@ -752,7 +758,7 @@ void vm_userspace_mem_region_add(struct kvm_vm *vm, region->mmap_start = mmap(NULL, region->mmap_size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS - | (src_type == VM_MEM_SRC_ANONYMOUS_HUGETLB ? MAP_HUGETLB : 0), + | vm_mem_backing_src_alias(src_type)->flag, -1, 0); TEST_ASSERT(region->mmap_start != MAP_FAILED, "test_malloc failed, mmap_start: %p errno: %i", @@ -762,22 +768,13 @@ void vm_userspace_mem_region_add(struct kvm_vm *vm, region->host_mem = align(region->mmap_start, alignment); /* As needed perform madvise */ - if (src_type == VM_MEM_SRC_ANONYMOUS || src_type == VM_MEM_SRC_ANONYMOUS_THP) { - struct stat statbuf; - - ret = stat("/sys/kernel/mm/transparent_hugepage", &statbuf); - TEST_ASSERT(ret == 0 || (ret == -1 && errno == ENOENT), - "stat /sys/kernel/mm/transparent_hugepage"); - - TEST_ASSERT(ret == 0 || src_type != VM_MEM_SRC_ANONYMOUS_THP, - "VM_MEM_SRC_ANONYMOUS_THP requires THP to be configured in the host kernel"); - - if (ret == 0) { - ret = madvise(region->host_mem, npages * vm->page_size, - src_type == VM_MEM_SRC_ANONYMOUS ? MADV_NOHUGEPAGE : MADV_HUGEPAGE); - TEST_ASSERT(ret == 0, "madvise failed, addr: %p length: 0x%lx src_type: %x", - region->host_mem, npages * vm->page_size, src_type); - } + if ((src_type == VM_MEM_SRC_ANONYMOUS || + src_type == VM_MEM_SRC_ANONYMOUS_THP) && thp_configured()) { + ret = madvise(region->host_mem, npages * vm->page_size, + src_type == VM_MEM_SRC_ANONYMOUS ? MADV_NOHUGEPAGE : MADV_HUGEPAGE); + TEST_ASSERT(ret == 0, "madvise failed, addr: %p length: 0x%lx src_type: %s", + region->host_mem, npages * vm->page_size, + vm_mem_backing_src_alias(src_type)->name); } region->unused_phy_pages = sparsebit_alloc(); @@ -1734,6 +1731,81 @@ int _kvm_ioctl(struct kvm_vm *vm, unsigned long cmd, void *arg) } /* + * Device Ioctl + */ + +int _kvm_device_check_attr(int dev_fd, uint32_t group, uint64_t attr) +{ + struct kvm_device_attr attribute = { + .group = group, + .attr = attr, + .flags = 0, + }; + + return ioctl(dev_fd, KVM_HAS_DEVICE_ATTR, &attribute); +} + +int kvm_device_check_attr(int dev_fd, uint32_t group, uint64_t attr) +{ + int ret = _kvm_device_check_attr(dev_fd, group, attr); + + TEST_ASSERT(ret >= 0, "KVM_HAS_DEVICE_ATTR failed, rc: %i errno: %i", ret, errno); + return ret; +} + +int _kvm_create_device(struct kvm_vm *vm, uint64_t type, bool test, int *fd) +{ + struct kvm_create_device create_dev; + int ret; + + create_dev.type = type; + create_dev.fd = -1; + create_dev.flags = test ? KVM_CREATE_DEVICE_TEST : 0; + ret = ioctl(vm_get_fd(vm), KVM_CREATE_DEVICE, &create_dev); + *fd = create_dev.fd; + return ret; +} + +int kvm_create_device(struct kvm_vm *vm, uint64_t type, bool test) +{ + int fd, ret; + + ret = _kvm_create_device(vm, type, test, &fd); + + if (!test) { + TEST_ASSERT(ret >= 0, + "KVM_CREATE_DEVICE IOCTL failed, rc: %i errno: %i", ret, errno); + return fd; + } + return ret; +} + +int _kvm_device_access(int dev_fd, uint32_t group, uint64_t attr, + void *val, bool write) +{ + struct kvm_device_attr kvmattr = { + .group = group, + .attr = attr, + .flags = 0, + .addr = (uintptr_t)val, + }; + int ret; + + ret = ioctl(dev_fd, write ? KVM_SET_DEVICE_ATTR : KVM_GET_DEVICE_ATTR, + &kvmattr); + return ret; +} + +int kvm_device_access(int dev_fd, uint32_t group, uint64_t attr, + void *val, bool write) +{ + int ret = _kvm_device_access(dev_fd, group, attr, val, write); + + TEST_ASSERT(ret >= 0, "KVM_SET|GET_DEVICE_ATTR IOCTL failed, rc: %i errno: %i", ret, errno); + return ret; +} + +/* * VM Dump * * Input Args: diff --git a/tools/testing/selftests/kvm/lib/test_util.c b/tools/testing/selftests/kvm/lib/test_util.c index 906c955384e2..63d2bc7d757b 100644 --- a/tools/testing/selftests/kvm/lib/test_util.c +++ b/tools/testing/selftests/kvm/lib/test_util.c @@ -10,6 +10,8 @@ #include <limits.h> #include <stdlib.h> #include <time.h> +#include <sys/stat.h> +#include <linux/mman.h> #include "linux/kernel.h" #include "test_util.h" @@ -111,28 +113,169 @@ void print_skip(const char *fmt, ...) puts(", skipping test"); } -const struct vm_mem_backing_src_alias backing_src_aliases[] = { - {"anonymous", VM_MEM_SRC_ANONYMOUS,}, - {"anonymous_thp", VM_MEM_SRC_ANONYMOUS_THP,}, - {"anonymous_hugetlb", VM_MEM_SRC_ANONYMOUS_HUGETLB,}, -}; +bool thp_configured(void) +{ + int ret; + struct stat statbuf; + + ret = stat("/sys/kernel/mm/transparent_hugepage", &statbuf); + TEST_ASSERT(ret == 0 || (ret == -1 && errno == ENOENT), + "Error in stating /sys/kernel/mm/transparent_hugepage"); + + return ret == 0; +} + +size_t get_trans_hugepagesz(void) +{ + size_t size; + FILE *f; + + TEST_ASSERT(thp_configured(), "THP is not configured in host kernel"); + + f = fopen("/sys/kernel/mm/transparent_hugepage/hpage_pmd_size", "r"); + TEST_ASSERT(f != NULL, "Error in opening transparent_hugepage/hpage_pmd_size"); + + fscanf(f, "%ld", &size); + fclose(f); + + return size; +} + +size_t get_def_hugetlb_pagesz(void) +{ + char buf[64]; + const char *tag = "Hugepagesize:"; + FILE *f; + + f = fopen("/proc/meminfo", "r"); + TEST_ASSERT(f != NULL, "Error in opening /proc/meminfo"); + + while (fgets(buf, sizeof(buf), f) != NULL) { + if (strstr(buf, tag) == buf) { + fclose(f); + return strtoull(buf + strlen(tag), NULL, 10) << 10; + } + } + + if (feof(f)) + TEST_FAIL("HUGETLB is not configured in host kernel"); + else + TEST_FAIL("Error in reading /proc/meminfo"); + + fclose(f); + return 0; +} + +const struct vm_mem_backing_src_alias *vm_mem_backing_src_alias(uint32_t i) +{ + static const struct vm_mem_backing_src_alias aliases[] = { + [VM_MEM_SRC_ANONYMOUS] = { + .name = "anonymous", + .flag = 0, + }, + [VM_MEM_SRC_ANONYMOUS_THP] = { + .name = "anonymous_thp", + .flag = 0, + }, + [VM_MEM_SRC_ANONYMOUS_HUGETLB] = { + .name = "anonymous_hugetlb", + .flag = MAP_HUGETLB, + }, + [VM_MEM_SRC_ANONYMOUS_HUGETLB_16KB] = { + .name = "anonymous_hugetlb_16kb", + .flag = MAP_HUGETLB | MAP_HUGE_16KB, + }, + [VM_MEM_SRC_ANONYMOUS_HUGETLB_64KB] = { + .name = "anonymous_hugetlb_64kb", + .flag = MAP_HUGETLB | MAP_HUGE_64KB, + }, + [VM_MEM_SRC_ANONYMOUS_HUGETLB_512KB] = { + .name = "anonymous_hugetlb_512kb", + .flag = MAP_HUGETLB | MAP_HUGE_512KB, + }, + [VM_MEM_SRC_ANONYMOUS_HUGETLB_1MB] = { + .name = "anonymous_hugetlb_1mb", + .flag = MAP_HUGETLB | MAP_HUGE_1MB, + }, + [VM_MEM_SRC_ANONYMOUS_HUGETLB_2MB] = { + .name = "anonymous_hugetlb_2mb", + .flag = MAP_HUGETLB | MAP_HUGE_2MB, + }, + [VM_MEM_SRC_ANONYMOUS_HUGETLB_8MB] = { + .name = "anonymous_hugetlb_8mb", + .flag = MAP_HUGETLB | MAP_HUGE_8MB, + }, + [VM_MEM_SRC_ANONYMOUS_HUGETLB_16MB] = { + .name = "anonymous_hugetlb_16mb", + .flag = MAP_HUGETLB | MAP_HUGE_16MB, + }, + [VM_MEM_SRC_ANONYMOUS_HUGETLB_32MB] = { + .name = "anonymous_hugetlb_32mb", + .flag = MAP_HUGETLB | MAP_HUGE_32MB, + }, + [VM_MEM_SRC_ANONYMOUS_HUGETLB_256MB] = { + .name = "anonymous_hugetlb_256mb", + .flag = MAP_HUGETLB | MAP_HUGE_256MB, + }, + [VM_MEM_SRC_ANONYMOUS_HUGETLB_512MB] = { + .name = "anonymous_hugetlb_512mb", + .flag = MAP_HUGETLB | MAP_HUGE_512MB, + }, + [VM_MEM_SRC_ANONYMOUS_HUGETLB_1GB] = { + .name = "anonymous_hugetlb_1gb", + .flag = MAP_HUGETLB | MAP_HUGE_1GB, + }, + [VM_MEM_SRC_ANONYMOUS_HUGETLB_2GB] = { + .name = "anonymous_hugetlb_2gb", + .flag = MAP_HUGETLB | MAP_HUGE_2GB, + }, + [VM_MEM_SRC_ANONYMOUS_HUGETLB_16GB] = { + .name = "anonymous_hugetlb_16gb", + .flag = MAP_HUGETLB | MAP_HUGE_16GB, + }, + }; + _Static_assert(ARRAY_SIZE(aliases) == NUM_SRC_TYPES, + "Missing new backing src types?"); + + TEST_ASSERT(i < NUM_SRC_TYPES, "Backing src type ID %d too big", i); + + return &aliases[i]; +} + +#define MAP_HUGE_PAGE_SIZE(x) (1ULL << ((x >> MAP_HUGE_SHIFT) & MAP_HUGE_MASK)) + +size_t get_backing_src_pagesz(uint32_t i) +{ + uint32_t flag = vm_mem_backing_src_alias(i)->flag; + + switch (i) { + case VM_MEM_SRC_ANONYMOUS: + return getpagesize(); + case VM_MEM_SRC_ANONYMOUS_THP: + return get_trans_hugepagesz(); + case VM_MEM_SRC_ANONYMOUS_HUGETLB: + return get_def_hugetlb_pagesz(); + default: + return MAP_HUGE_PAGE_SIZE(flag); + } +} void backing_src_help(void) { int i; printf("Available backing src types:\n"); - for (i = 0; i < ARRAY_SIZE(backing_src_aliases); i++) - printf("\t%s\n", backing_src_aliases[i].name); + for (i = 0; i < NUM_SRC_TYPES; i++) + printf("\t%s\n", vm_mem_backing_src_alias(i)->name); } enum vm_mem_backing_src_type parse_backing_src_type(const char *type_name) { int i; - for (i = 0; i < ARRAY_SIZE(backing_src_aliases); i++) - if (!strcmp(type_name, backing_src_aliases[i].name)) - return backing_src_aliases[i].type; + for (i = 0; i < NUM_SRC_TYPES; i++) + if (!strcmp(type_name, vm_mem_backing_src_alias(i)->name)) + return i; backing_src_help(); TEST_FAIL("Unknown backing src type: %s", type_name); diff --git a/tools/testing/selftests/kvm/set_memory_region_test.c b/tools/testing/selftests/kvm/set_memory_region_test.c index f127ed31dba7..978f5b5f4dc0 100644 --- a/tools/testing/selftests/kvm/set_memory_region_test.c +++ b/tools/testing/selftests/kvm/set_memory_region_test.c @@ -329,6 +329,22 @@ static void test_zero_memory_regions(void) } #endif /* __x86_64__ */ +static int test_memory_region_add(struct kvm_vm *vm, void *mem, uint32_t slot, + uint32_t size, uint64_t guest_addr) +{ + struct kvm_userspace_memory_region region; + int ret; + + region.slot = slot; + region.flags = 0; + region.guest_phys_addr = guest_addr; + region.memory_size = size; + region.userspace_addr = (uintptr_t) mem; + ret = ioctl(vm_get_fd(vm), KVM_SET_USER_MEMORY_REGION, ®ion); + + return ret; +} + /* * Test it can be added memory slots up to KVM_CAP_NR_MEMSLOTS, then any * tentative to add further slots should fail. @@ -339,9 +355,15 @@ static void test_add_max_memory_regions(void) struct kvm_vm *vm; uint32_t max_mem_slots; uint32_t slot; - uint64_t guest_addr = 0x0; - uint64_t mem_reg_npages; - void *mem; + void *mem, *mem_aligned, *mem_extra; + size_t alignment; + +#ifdef __s390x__ + /* On s390x, the host address must be aligned to 1M (due to PGSTEs) */ + alignment = 0x100000; +#else + alignment = 1; +#endif max_mem_slots = kvm_check_cap(KVM_CAP_NR_MEMSLOTS); TEST_ASSERT(max_mem_slots > 0, @@ -350,30 +372,37 @@ static void test_add_max_memory_regions(void) vm = vm_create(VM_MODE_DEFAULT, 0, O_RDWR); - mem_reg_npages = vm_calc_num_guest_pages(VM_MODE_DEFAULT, MEM_REGION_SIZE); - /* Check it can be added memory slots up to the maximum allowed */ pr_info("Adding slots 0..%i, each memory region with %dK size\n", (max_mem_slots - 1), MEM_REGION_SIZE >> 10); + + mem = mmap(NULL, MEM_REGION_SIZE * max_mem_slots + alignment, + PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); + TEST_ASSERT(mem != MAP_FAILED, "Failed to mmap() host"); + mem_aligned = (void *)(((size_t) mem + alignment - 1) & ~(alignment - 1)); + for (slot = 0; slot < max_mem_slots; slot++) { - vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS, - guest_addr, slot, mem_reg_npages, - 0); - guest_addr += MEM_REGION_SIZE; + ret = test_memory_region_add(vm, mem_aligned + + ((uint64_t)slot * MEM_REGION_SIZE), + slot, MEM_REGION_SIZE, + (uint64_t)slot * MEM_REGION_SIZE); + TEST_ASSERT(ret == 0, "KVM_SET_USER_MEMORY_REGION IOCTL failed,\n" + " rc: %i errno: %i slot: %i\n", + ret, errno, slot); } /* Check it cannot be added memory slots beyond the limit */ - mem = mmap(NULL, MEM_REGION_SIZE, PROT_READ | PROT_WRITE, - MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); - TEST_ASSERT(mem != MAP_FAILED, "Failed to mmap() host"); + mem_extra = mmap(NULL, MEM_REGION_SIZE, PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); + TEST_ASSERT(mem_extra != MAP_FAILED, "Failed to mmap() host"); - ret = ioctl(vm_get_fd(vm), KVM_SET_USER_MEMORY_REGION, - &(struct kvm_userspace_memory_region) {slot, 0, guest_addr, - MEM_REGION_SIZE, (uint64_t) mem}); + ret = test_memory_region_add(vm, mem_extra, max_mem_slots, MEM_REGION_SIZE, + (uint64_t)max_mem_slots * MEM_REGION_SIZE); TEST_ASSERT(ret == -1 && errno == EINVAL, "Adding one more memory slot should fail with EINVAL"); - munmap(mem, MEM_REGION_SIZE); + munmap(mem, MEM_REGION_SIZE * max_mem_slots + alignment); + munmap(mem_extra, MEM_REGION_SIZE); kvm_vm_free(vm); } diff --git a/tools/testing/selftests/kvm/x86_64/xen_shinfo_test.c b/tools/testing/selftests/kvm/x86_64/xen_shinfo_test.c index 804ff5ff022d..1f4a0599683c 100644 --- a/tools/testing/selftests/kvm/x86_64/xen_shinfo_test.c +++ b/tools/testing/selftests/kvm/x86_64/xen_shinfo_test.c @@ -186,7 +186,7 @@ int main(int argc, char *argv[]) vcpu_ioctl(vm, VCPU_ID, KVM_XEN_VCPU_SET_ATTR, &st); } - struct vcpu_runstate_info *rs = addr_gpa2hva(vm, RUNSTATE_ADDR);; + struct vcpu_runstate_info *rs = addr_gpa2hva(vm, RUNSTATE_ADDR); rs->state = 0x5a; for (;;) { diff --git a/tools/testing/selftests/landlock/.gitignore b/tools/testing/selftests/landlock/.gitignore new file mode 100644 index 000000000000..470203a7cd73 --- /dev/null +++ b/tools/testing/selftests/landlock/.gitignore @@ -0,0 +1,2 @@ +/*_test +/true diff --git a/tools/testing/selftests/landlock/Makefile b/tools/testing/selftests/landlock/Makefile new file mode 100644 index 000000000000..a99596ca9882 --- /dev/null +++ b/tools/testing/selftests/landlock/Makefile @@ -0,0 +1,24 @@ +# SPDX-License-Identifier: GPL-2.0 + +CFLAGS += -Wall -O2 + +src_test := $(wildcard *_test.c) + +TEST_GEN_PROGS := $(src_test:.c=) + +TEST_GEN_PROGS_EXTENDED := true + +KSFT_KHDR_INSTALL := 1 +OVERRIDE_TARGETS := 1 +include ../lib.mk + +khdr_dir = $(top_srcdir)/usr/include + +$(khdr_dir)/linux/landlock.h: khdr + @: + +$(OUTPUT)/true: true.c + $(LINK.c) $< $(LDLIBS) -o $@ -static + +$(OUTPUT)/%_test: %_test.c $(khdr_dir)/linux/landlock.h ../kselftest_harness.h common.h + $(LINK.c) $< $(LDLIBS) -o $@ -lcap -I$(khdr_dir) diff --git a/tools/testing/selftests/landlock/base_test.c b/tools/testing/selftests/landlock/base_test.c new file mode 100644 index 000000000000..ca40abe9daa8 --- /dev/null +++ b/tools/testing/selftests/landlock/base_test.c @@ -0,0 +1,266 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Landlock tests - Common user space base + * + * Copyright © 2017-2020 Mickaël Salaün <mic@digikod.net> + * Copyright © 2019-2020 ANSSI + */ + +#define _GNU_SOURCE +#include <errno.h> +#include <fcntl.h> +#include <linux/landlock.h> +#include <string.h> +#include <sys/prctl.h> +#include <sys/socket.h> +#include <sys/types.h> + +#include "common.h" + +#ifndef O_PATH +#define O_PATH 010000000 +#endif + +TEST(inconsistent_attr) { + const long page_size = sysconf(_SC_PAGESIZE); + char *const buf = malloc(page_size + 1); + struct landlock_ruleset_attr *const ruleset_attr = (void *)buf; + + ASSERT_NE(NULL, buf); + + /* Checks copy_from_user(). */ + ASSERT_EQ(-1, landlock_create_ruleset(ruleset_attr, 0, 0)); + /* The size if less than sizeof(struct landlock_attr_enforce). */ + ASSERT_EQ(EINVAL, errno); + ASSERT_EQ(-1, landlock_create_ruleset(ruleset_attr, 1, 0)); + ASSERT_EQ(EINVAL, errno); + + ASSERT_EQ(-1, landlock_create_ruleset(NULL, 1, 0)); + /* The size if less than sizeof(struct landlock_attr_enforce). */ + ASSERT_EQ(EFAULT, errno); + + ASSERT_EQ(-1, landlock_create_ruleset(NULL, + sizeof(struct landlock_ruleset_attr), 0)); + ASSERT_EQ(EFAULT, errno); + + ASSERT_EQ(-1, landlock_create_ruleset(ruleset_attr, page_size + 1, 0)); + ASSERT_EQ(E2BIG, errno); + + ASSERT_EQ(-1, landlock_create_ruleset(ruleset_attr, + sizeof(struct landlock_ruleset_attr), 0)); + ASSERT_EQ(ENOMSG, errno); + ASSERT_EQ(-1, landlock_create_ruleset(ruleset_attr, page_size, 0)); + ASSERT_EQ(ENOMSG, errno); + + /* Checks non-zero value. */ + buf[page_size - 2] = '.'; + ASSERT_EQ(-1, landlock_create_ruleset(ruleset_attr, page_size, 0)); + ASSERT_EQ(E2BIG, errno); + + ASSERT_EQ(-1, landlock_create_ruleset(ruleset_attr, page_size + 1, 0)); + ASSERT_EQ(E2BIG, errno); + + free(buf); +} + +TEST(abi_version) { + const struct landlock_ruleset_attr ruleset_attr = { + .handled_access_fs = LANDLOCK_ACCESS_FS_READ_FILE, + }; + ASSERT_EQ(1, landlock_create_ruleset(NULL, 0, + LANDLOCK_CREATE_RULESET_VERSION)); + + ASSERT_EQ(-1, landlock_create_ruleset(&ruleset_attr, 0, + LANDLOCK_CREATE_RULESET_VERSION)); + ASSERT_EQ(EINVAL, errno); + + ASSERT_EQ(-1, landlock_create_ruleset(NULL, sizeof(ruleset_attr), + LANDLOCK_CREATE_RULESET_VERSION)); + ASSERT_EQ(EINVAL, errno); + + ASSERT_EQ(-1, landlock_create_ruleset(&ruleset_attr, + sizeof(ruleset_attr), + LANDLOCK_CREATE_RULESET_VERSION)); + ASSERT_EQ(EINVAL, errno); + + ASSERT_EQ(-1, landlock_create_ruleset(NULL, 0, + LANDLOCK_CREATE_RULESET_VERSION | 1 << 31)); + ASSERT_EQ(EINVAL, errno); +} + +TEST(inval_create_ruleset_flags) { + const int last_flag = LANDLOCK_CREATE_RULESET_VERSION; + const int invalid_flag = last_flag << 1; + const struct landlock_ruleset_attr ruleset_attr = { + .handled_access_fs = LANDLOCK_ACCESS_FS_READ_FILE, + }; + + ASSERT_EQ(-1, landlock_create_ruleset(NULL, 0, invalid_flag)); + ASSERT_EQ(EINVAL, errno); + + ASSERT_EQ(-1, landlock_create_ruleset(&ruleset_attr, 0, invalid_flag)); + ASSERT_EQ(EINVAL, errno); + + ASSERT_EQ(-1, landlock_create_ruleset(NULL, sizeof(ruleset_attr), + invalid_flag)); + ASSERT_EQ(EINVAL, errno); + + ASSERT_EQ(-1, landlock_create_ruleset(&ruleset_attr, + sizeof(ruleset_attr), invalid_flag)); + ASSERT_EQ(EINVAL, errno); +} + +TEST(empty_path_beneath_attr) { + const struct landlock_ruleset_attr ruleset_attr = { + .handled_access_fs = LANDLOCK_ACCESS_FS_EXECUTE, + }; + const int ruleset_fd = landlock_create_ruleset(&ruleset_attr, + sizeof(ruleset_attr), 0); + + ASSERT_LE(0, ruleset_fd); + + /* Similar to struct landlock_path_beneath_attr.parent_fd = 0 */ + ASSERT_EQ(-1, landlock_add_rule(ruleset_fd, LANDLOCK_RULE_PATH_BENEATH, + NULL, 0)); + ASSERT_EQ(EFAULT, errno); + ASSERT_EQ(0, close(ruleset_fd)); +} + +TEST(inval_fd_enforce) { + ASSERT_EQ(0, prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0)); + + ASSERT_EQ(-1, landlock_restrict_self(-1, 0)); + ASSERT_EQ(EBADF, errno); +} + +TEST(unpriv_enforce_without_no_new_privs) { + int err; + + drop_caps(_metadata); + err = landlock_restrict_self(-1, 0); + ASSERT_EQ(EPERM, errno); + ASSERT_EQ(err, -1); +} + +TEST(ruleset_fd_io) +{ + struct landlock_ruleset_attr ruleset_attr = { + .handled_access_fs = LANDLOCK_ACCESS_FS_READ_FILE, + }; + int ruleset_fd; + char buf; + + drop_caps(_metadata); + ruleset_fd = landlock_create_ruleset(&ruleset_attr, + sizeof(ruleset_attr), 0); + ASSERT_LE(0, ruleset_fd); + + ASSERT_EQ(-1, write(ruleset_fd, ".", 1)); + ASSERT_EQ(EINVAL, errno); + ASSERT_EQ(-1, read(ruleset_fd, &buf, 1)); + ASSERT_EQ(EINVAL, errno); + + ASSERT_EQ(0, close(ruleset_fd)); +} + +/* Tests enforcement of a ruleset FD transferred through a UNIX socket. */ +TEST(ruleset_fd_transfer) +{ + struct landlock_ruleset_attr ruleset_attr = { + .handled_access_fs = LANDLOCK_ACCESS_FS_READ_DIR, + }; + struct landlock_path_beneath_attr path_beneath_attr = { + .allowed_access = LANDLOCK_ACCESS_FS_READ_DIR, + }; + int ruleset_fd_tx, dir_fd; + union { + /* Aligned ancillary data buffer. */ + char buf[CMSG_SPACE(sizeof(ruleset_fd_tx))]; + struct cmsghdr _align; + } cmsg_tx = {}; + char data_tx = '.'; + struct iovec io = { + .iov_base = &data_tx, + .iov_len = sizeof(data_tx), + }; + struct msghdr msg = { + .msg_iov = &io, + .msg_iovlen = 1, + .msg_control = &cmsg_tx.buf, + .msg_controllen = sizeof(cmsg_tx.buf), + }; + struct cmsghdr *cmsg; + int socket_fds[2]; + pid_t child; + int status; + + drop_caps(_metadata); + + /* Creates a test ruleset with a simple rule. */ + ruleset_fd_tx = landlock_create_ruleset(&ruleset_attr, + sizeof(ruleset_attr), 0); + ASSERT_LE(0, ruleset_fd_tx); + path_beneath_attr.parent_fd = open("/tmp", O_PATH | O_NOFOLLOW | + O_DIRECTORY | O_CLOEXEC); + ASSERT_LE(0, path_beneath_attr.parent_fd); + ASSERT_EQ(0, landlock_add_rule(ruleset_fd_tx, LANDLOCK_RULE_PATH_BENEATH, + &path_beneath_attr, 0)); + ASSERT_EQ(0, close(path_beneath_attr.parent_fd)); + + cmsg = CMSG_FIRSTHDR(&msg); + ASSERT_NE(NULL, cmsg); + cmsg->cmsg_len = CMSG_LEN(sizeof(ruleset_fd_tx)); + cmsg->cmsg_level = SOL_SOCKET; + cmsg->cmsg_type = SCM_RIGHTS; + memcpy(CMSG_DATA(cmsg), &ruleset_fd_tx, sizeof(ruleset_fd_tx)); + + /* Sends the ruleset FD over a socketpair and then close it. */ + ASSERT_EQ(0, socketpair(AF_UNIX, SOCK_STREAM | SOCK_CLOEXEC, 0, socket_fds)); + ASSERT_EQ(sizeof(data_tx), sendmsg(socket_fds[0], &msg, 0)); + ASSERT_EQ(0, close(socket_fds[0])); + ASSERT_EQ(0, close(ruleset_fd_tx)); + + child = fork(); + ASSERT_LE(0, child); + if (child == 0) { + int ruleset_fd_rx; + + *(char *)msg.msg_iov->iov_base = '\0'; + ASSERT_EQ(sizeof(data_tx), recvmsg(socket_fds[1], &msg, MSG_CMSG_CLOEXEC)); + ASSERT_EQ('.', *(char *)msg.msg_iov->iov_base); + ASSERT_EQ(0, close(socket_fds[1])); + cmsg = CMSG_FIRSTHDR(&msg); + ASSERT_EQ(cmsg->cmsg_len, CMSG_LEN(sizeof(ruleset_fd_tx))); + memcpy(&ruleset_fd_rx, CMSG_DATA(cmsg), sizeof(ruleset_fd_tx)); + + /* Enforces the received ruleset on the child. */ + ASSERT_EQ(0, prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0)); + ASSERT_EQ(0, landlock_restrict_self(ruleset_fd_rx, 0)); + ASSERT_EQ(0, close(ruleset_fd_rx)); + + /* Checks that the ruleset enforcement. */ + ASSERT_EQ(-1, open("/", O_RDONLY | O_DIRECTORY | O_CLOEXEC)); + ASSERT_EQ(EACCES, errno); + dir_fd = open("/tmp", O_RDONLY | O_DIRECTORY | O_CLOEXEC); + ASSERT_LE(0, dir_fd); + ASSERT_EQ(0, close(dir_fd)); + _exit(_metadata->passed ? EXIT_SUCCESS : EXIT_FAILURE); + return; + } + + ASSERT_EQ(0, close(socket_fds[1])); + + /* Checks that the parent is unrestricted. */ + dir_fd = open("/", O_RDONLY | O_DIRECTORY | O_CLOEXEC); + ASSERT_LE(0, dir_fd); + ASSERT_EQ(0, close(dir_fd)); + dir_fd = open("/tmp", O_RDONLY | O_DIRECTORY | O_CLOEXEC); + ASSERT_LE(0, dir_fd); + ASSERT_EQ(0, close(dir_fd)); + + ASSERT_EQ(child, waitpid(child, &status, 0)); + ASSERT_EQ(1, WIFEXITED(status)); + ASSERT_EQ(EXIT_SUCCESS, WEXITSTATUS(status)); +} + +TEST_HARNESS_MAIN diff --git a/tools/testing/selftests/landlock/common.h b/tools/testing/selftests/landlock/common.h new file mode 100644 index 000000000000..20e2a9286d71 --- /dev/null +++ b/tools/testing/selftests/landlock/common.h @@ -0,0 +1,183 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Landlock test helpers + * + * Copyright © 2017-2020 Mickaël Salaün <mic@digikod.net> + * Copyright © 2019-2020 ANSSI + * Copyright © 2021 Microsoft Corporation + */ + +#include <errno.h> +#include <linux/landlock.h> +#include <sys/capability.h> +#include <sys/syscall.h> +#include <sys/types.h> +#include <sys/wait.h> +#include <unistd.h> + +#include "../kselftest_harness.h" + +#ifndef ARRAY_SIZE +#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0])) +#endif + +/* + * TEST_F_FORK() is useful when a test drop privileges but the corresponding + * FIXTURE_TEARDOWN() requires them (e.g. to remove files from a directory + * where write actions are denied). For convenience, FIXTURE_TEARDOWN() is + * also called when the test failed, but not when FIXTURE_SETUP() failed. For + * this to be possible, we must not call abort() but instead exit smoothly + * (hence the step print). + */ +#define TEST_F_FORK(fixture_name, test_name) \ + static void fixture_name##_##test_name##_child( \ + struct __test_metadata *_metadata, \ + FIXTURE_DATA(fixture_name) *self, \ + const FIXTURE_VARIANT(fixture_name) *variant); \ + TEST_F(fixture_name, test_name) \ + { \ + int status; \ + const pid_t child = fork(); \ + if (child < 0) \ + abort(); \ + if (child == 0) { \ + _metadata->no_print = 1; \ + fixture_name##_##test_name##_child(_metadata, self, variant); \ + if (_metadata->skip) \ + _exit(255); \ + if (_metadata->passed) \ + _exit(0); \ + _exit(_metadata->step); \ + } \ + if (child != waitpid(child, &status, 0)) \ + abort(); \ + if (WIFSIGNALED(status) || !WIFEXITED(status)) { \ + _metadata->passed = 0; \ + _metadata->step = 1; \ + return; \ + } \ + switch (WEXITSTATUS(status)) { \ + case 0: \ + _metadata->passed = 1; \ + break; \ + case 255: \ + _metadata->passed = 1; \ + _metadata->skip = 1; \ + break; \ + default: \ + _metadata->passed = 0; \ + _metadata->step = WEXITSTATUS(status); \ + break; \ + } \ + } \ + static void fixture_name##_##test_name##_child( \ + struct __test_metadata __attribute__((unused)) *_metadata, \ + FIXTURE_DATA(fixture_name) __attribute__((unused)) *self, \ + const FIXTURE_VARIANT(fixture_name) \ + __attribute__((unused)) *variant) + +#ifndef landlock_create_ruleset +static inline int landlock_create_ruleset( + const struct landlock_ruleset_attr *const attr, + const size_t size, const __u32 flags) +{ + return syscall(__NR_landlock_create_ruleset, attr, size, flags); +} +#endif + +#ifndef landlock_add_rule +static inline int landlock_add_rule(const int ruleset_fd, + const enum landlock_rule_type rule_type, + const void *const rule_attr, const __u32 flags) +{ + return syscall(__NR_landlock_add_rule, ruleset_fd, rule_type, + rule_attr, flags); +} +#endif + +#ifndef landlock_restrict_self +static inline int landlock_restrict_self(const int ruleset_fd, + const __u32 flags) +{ + return syscall(__NR_landlock_restrict_self, ruleset_fd, flags); +} +#endif + +static void _init_caps(struct __test_metadata *const _metadata, bool drop_all) +{ + cap_t cap_p; + /* Only these three capabilities are useful for the tests. */ + const cap_value_t caps[] = { + CAP_DAC_OVERRIDE, + CAP_MKNOD, + CAP_SYS_ADMIN, + CAP_SYS_CHROOT, + }; + + cap_p = cap_get_proc(); + EXPECT_NE(NULL, cap_p) { + TH_LOG("Failed to cap_get_proc: %s", strerror(errno)); + } + EXPECT_NE(-1, cap_clear(cap_p)) { + TH_LOG("Failed to cap_clear: %s", strerror(errno)); + } + if (!drop_all) { + EXPECT_NE(-1, cap_set_flag(cap_p, CAP_PERMITTED, + ARRAY_SIZE(caps), caps, CAP_SET)) { + TH_LOG("Failed to cap_set_flag: %s", strerror(errno)); + } + } + EXPECT_NE(-1, cap_set_proc(cap_p)) { + TH_LOG("Failed to cap_set_proc: %s", strerror(errno)); + } + EXPECT_NE(-1, cap_free(cap_p)) { + TH_LOG("Failed to cap_free: %s", strerror(errno)); + } +} + +/* We cannot put such helpers in a library because of kselftest_harness.h . */ +__attribute__((__unused__)) +static void disable_caps(struct __test_metadata *const _metadata) +{ + _init_caps(_metadata, false); +} + +__attribute__((__unused__)) +static void drop_caps(struct __test_metadata *const _metadata) +{ + _init_caps(_metadata, true); +} + +static void _effective_cap(struct __test_metadata *const _metadata, + const cap_value_t caps, const cap_flag_value_t value) +{ + cap_t cap_p; + + cap_p = cap_get_proc(); + EXPECT_NE(NULL, cap_p) { + TH_LOG("Failed to cap_get_proc: %s", strerror(errno)); + } + EXPECT_NE(-1, cap_set_flag(cap_p, CAP_EFFECTIVE, 1, &caps, value)) { + TH_LOG("Failed to cap_set_flag: %s", strerror(errno)); + } + EXPECT_NE(-1, cap_set_proc(cap_p)) { + TH_LOG("Failed to cap_set_proc: %s", strerror(errno)); + } + EXPECT_NE(-1, cap_free(cap_p)) { + TH_LOG("Failed to cap_free: %s", strerror(errno)); + } +} + +__attribute__((__unused__)) +static void set_cap(struct __test_metadata *const _metadata, + const cap_value_t caps) +{ + _effective_cap(_metadata, caps, CAP_SET); +} + +__attribute__((__unused__)) +static void clear_cap(struct __test_metadata *const _metadata, + const cap_value_t caps) +{ + _effective_cap(_metadata, caps, CAP_CLEAR); +} diff --git a/tools/testing/selftests/landlock/config b/tools/testing/selftests/landlock/config new file mode 100644 index 000000000000..0f0a65287bac --- /dev/null +++ b/tools/testing/selftests/landlock/config @@ -0,0 +1,7 @@ +CONFIG_OVERLAY_FS=y +CONFIG_SECURITY_LANDLOCK=y +CONFIG_SECURITY_PATH=y +CONFIG_SECURITY=y +CONFIG_SHMEM=y +CONFIG_TMPFS_XATTR=y +CONFIG_TMPFS=y diff --git a/tools/testing/selftests/landlock/fs_test.c b/tools/testing/selftests/landlock/fs_test.c new file mode 100644 index 000000000000..10c9a1e4ebd9 --- /dev/null +++ b/tools/testing/selftests/landlock/fs_test.c @@ -0,0 +1,2791 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Landlock tests - Filesystem + * + * Copyright © 2017-2020 Mickaël Salaün <mic@digikod.net> + * Copyright © 2020 ANSSI + * Copyright © 2020-2021 Microsoft Corporation + */ + +#define _GNU_SOURCE +#include <fcntl.h> +#include <linux/landlock.h> +#include <sched.h> +#include <string.h> +#include <sys/capability.h> +#include <sys/mount.h> +#include <sys/prctl.h> +#include <sys/sendfile.h> +#include <sys/stat.h> +#include <sys/sysmacros.h> +#include <unistd.h> + +#include "common.h" + +#define TMP_DIR "tmp" +#define BINARY_PATH "./true" + +/* Paths (sibling number and depth) */ +static const char dir_s1d1[] = TMP_DIR "/s1d1"; +static const char file1_s1d1[] = TMP_DIR "/s1d1/f1"; +static const char file2_s1d1[] = TMP_DIR "/s1d1/f2"; +static const char dir_s1d2[] = TMP_DIR "/s1d1/s1d2"; +static const char file1_s1d2[] = TMP_DIR "/s1d1/s1d2/f1"; +static const char file2_s1d2[] = TMP_DIR "/s1d1/s1d2/f2"; +static const char dir_s1d3[] = TMP_DIR "/s1d1/s1d2/s1d3"; +static const char file1_s1d3[] = TMP_DIR "/s1d1/s1d2/s1d3/f1"; +static const char file2_s1d3[] = TMP_DIR "/s1d1/s1d2/s1d3/f2"; + +static const char dir_s2d1[] = TMP_DIR "/s2d1"; +static const char file1_s2d1[] = TMP_DIR "/s2d1/f1"; +static const char dir_s2d2[] = TMP_DIR "/s2d1/s2d2"; +static const char file1_s2d2[] = TMP_DIR "/s2d1/s2d2/f1"; +static const char dir_s2d3[] = TMP_DIR "/s2d1/s2d2/s2d3"; +static const char file1_s2d3[] = TMP_DIR "/s2d1/s2d2/s2d3/f1"; +static const char file2_s2d3[] = TMP_DIR "/s2d1/s2d2/s2d3/f2"; + +static const char dir_s3d1[] = TMP_DIR "/s3d1"; +/* dir_s3d2 is a mount point. */ +static const char dir_s3d2[] = TMP_DIR "/s3d1/s3d2"; +static const char dir_s3d3[] = TMP_DIR "/s3d1/s3d2/s3d3"; + +/* + * layout1 hierarchy: + * + * tmp + * ├── s1d1 + * │  ├── f1 + * │  ├── f2 + * │  └── s1d2 + * │  ├── f1 + * │  ├── f2 + * │  └── s1d3 + * │  ├── f1 + * │  └── f2 + * ├── s2d1 + * │  ├── f1 + * │  └── s2d2 + * │  ├── f1 + * │  └── s2d3 + * │  ├── f1 + * │  └── f2 + * └── s3d1 + * └── s3d2 + * └── s3d3 + */ + +static void mkdir_parents(struct __test_metadata *const _metadata, + const char *const path) +{ + char *walker; + const char *parent; + int i, err; + + ASSERT_NE(path[0], '\0'); + walker = strdup(path); + ASSERT_NE(NULL, walker); + parent = walker; + for (i = 1; walker[i]; i++) { + if (walker[i] != '/') + continue; + walker[i] = '\0'; + err = mkdir(parent, 0700); + ASSERT_FALSE(err && errno != EEXIST) { + TH_LOG("Failed to create directory \"%s\": %s", + parent, strerror(errno)); + } + walker[i] = '/'; + } + free(walker); +} + +static void create_directory(struct __test_metadata *const _metadata, + const char *const path) +{ + mkdir_parents(_metadata, path); + ASSERT_EQ(0, mkdir(path, 0700)) { + TH_LOG("Failed to create directory \"%s\": %s", path, + strerror(errno)); + } +} + +static void create_file(struct __test_metadata *const _metadata, + const char *const path) +{ + mkdir_parents(_metadata, path); + ASSERT_EQ(0, mknod(path, S_IFREG | 0700, 0)) { + TH_LOG("Failed to create file \"%s\": %s", path, + strerror(errno)); + } +} + +static int remove_path(const char *const path) +{ + char *walker; + int i, ret, err = 0; + + walker = strdup(path); + if (!walker) { + err = ENOMEM; + goto out; + } + if (unlink(path) && rmdir(path)) { + if (errno != ENOENT) + err = errno; + goto out; + } + for (i = strlen(walker); i > 0; i--) { + if (walker[i] != '/') + continue; + walker[i] = '\0'; + ret = rmdir(walker); + if (ret) { + if (errno != ENOTEMPTY && errno != EBUSY) + err = errno; + goto out; + } + if (strcmp(walker, TMP_DIR) == 0) + goto out; + } + +out: + free(walker); + return err; +} + +static void prepare_layout(struct __test_metadata *const _metadata) +{ + disable_caps(_metadata); + umask(0077); + create_directory(_metadata, TMP_DIR); + + /* + * Do not pollute the rest of the system: creates a private mount point + * for tests relying on pivot_root(2) and move_mount(2). + */ + set_cap(_metadata, CAP_SYS_ADMIN); + ASSERT_EQ(0, unshare(CLONE_NEWNS)); + ASSERT_EQ(0, mount("tmp", TMP_DIR, "tmpfs", 0, "size=4m,mode=700")); + ASSERT_EQ(0, mount(NULL, TMP_DIR, NULL, MS_PRIVATE | MS_REC, NULL)); + clear_cap(_metadata, CAP_SYS_ADMIN); +} + +static void cleanup_layout(struct __test_metadata *const _metadata) +{ + set_cap(_metadata, CAP_SYS_ADMIN); + EXPECT_EQ(0, umount(TMP_DIR)); + clear_cap(_metadata, CAP_SYS_ADMIN); + EXPECT_EQ(0, remove_path(TMP_DIR)); +} + +static void create_layout1(struct __test_metadata *const _metadata) +{ + create_file(_metadata, file1_s1d1); + create_file(_metadata, file1_s1d2); + create_file(_metadata, file1_s1d3); + create_file(_metadata, file2_s1d1); + create_file(_metadata, file2_s1d2); + create_file(_metadata, file2_s1d3); + + create_file(_metadata, file1_s2d1); + create_file(_metadata, file1_s2d2); + create_file(_metadata, file1_s2d3); + create_file(_metadata, file2_s2d3); + + create_directory(_metadata, dir_s3d2); + set_cap(_metadata, CAP_SYS_ADMIN); + ASSERT_EQ(0, mount("tmp", dir_s3d2, "tmpfs", 0, "size=4m,mode=700")); + clear_cap(_metadata, CAP_SYS_ADMIN); + + ASSERT_EQ(0, mkdir(dir_s3d3, 0700)); +} + +static void remove_layout1(struct __test_metadata *const _metadata) +{ + EXPECT_EQ(0, remove_path(file2_s1d3)); + EXPECT_EQ(0, remove_path(file2_s1d2)); + EXPECT_EQ(0, remove_path(file2_s1d1)); + EXPECT_EQ(0, remove_path(file1_s1d3)); + EXPECT_EQ(0, remove_path(file1_s1d2)); + EXPECT_EQ(0, remove_path(file1_s1d1)); + + EXPECT_EQ(0, remove_path(file2_s2d3)); + EXPECT_EQ(0, remove_path(file1_s2d3)); + EXPECT_EQ(0, remove_path(file1_s2d2)); + EXPECT_EQ(0, remove_path(file1_s2d1)); + + EXPECT_EQ(0, remove_path(dir_s3d3)); + set_cap(_metadata, CAP_SYS_ADMIN); + umount(dir_s3d2); + clear_cap(_metadata, CAP_SYS_ADMIN); + EXPECT_EQ(0, remove_path(dir_s3d2)); +} + +FIXTURE(layout1) { +}; + +FIXTURE_SETUP(layout1) +{ + prepare_layout(_metadata); + + create_layout1(_metadata); +} + +FIXTURE_TEARDOWN(layout1) +{ + remove_layout1(_metadata); + + cleanup_layout(_metadata); +} + +/* + * This helper enables to use the ASSERT_* macros and print the line number + * pointing to the test caller. + */ +static int test_open_rel(const int dirfd, const char *const path, const int flags) +{ + int fd; + + /* Works with file and directories. */ + fd = openat(dirfd, path, flags | O_CLOEXEC); + if (fd < 0) + return errno; + /* + * Mixing error codes from close(2) and open(2) should not lead to any + * (access type) confusion for this test. + */ + if (close(fd) != 0) + return errno; + return 0; +} + +static int test_open(const char *const path, const int flags) +{ + return test_open_rel(AT_FDCWD, path, flags); +} + +TEST_F_FORK(layout1, no_restriction) +{ + ASSERT_EQ(0, test_open(dir_s1d1, O_RDONLY)); + ASSERT_EQ(0, test_open(file1_s1d1, O_RDONLY)); + ASSERT_EQ(0, test_open(file2_s1d1, O_RDONLY)); + ASSERT_EQ(0, test_open(dir_s1d2, O_RDONLY)); + ASSERT_EQ(0, test_open(file1_s1d2, O_RDONLY)); + ASSERT_EQ(0, test_open(file2_s1d2, O_RDONLY)); + ASSERT_EQ(0, test_open(dir_s1d3, O_RDONLY)); + ASSERT_EQ(0, test_open(file1_s1d3, O_RDONLY)); + + ASSERT_EQ(0, test_open(dir_s2d1, O_RDONLY)); + ASSERT_EQ(0, test_open(file1_s2d1, O_RDONLY)); + ASSERT_EQ(0, test_open(dir_s2d2, O_RDONLY)); + ASSERT_EQ(0, test_open(file1_s2d2, O_RDONLY)); + ASSERT_EQ(0, test_open(dir_s2d3, O_RDONLY)); + ASSERT_EQ(0, test_open(file1_s2d3, O_RDONLY)); + + ASSERT_EQ(0, test_open(dir_s3d1, O_RDONLY)); + ASSERT_EQ(0, test_open(dir_s3d2, O_RDONLY)); + ASSERT_EQ(0, test_open(dir_s3d3, O_RDONLY)); +} + +TEST_F_FORK(layout1, inval) +{ + struct landlock_path_beneath_attr path_beneath = { + .allowed_access = LANDLOCK_ACCESS_FS_READ_FILE | + LANDLOCK_ACCESS_FS_WRITE_FILE, + .parent_fd = -1, + }; + struct landlock_ruleset_attr ruleset_attr = { + .handled_access_fs = LANDLOCK_ACCESS_FS_READ_FILE | + LANDLOCK_ACCESS_FS_WRITE_FILE, + }; + int ruleset_fd; + + path_beneath.parent_fd = open(dir_s1d2, O_PATH | O_DIRECTORY | + O_CLOEXEC); + ASSERT_LE(0, path_beneath.parent_fd); + + ruleset_fd = open(dir_s1d1, O_PATH | O_DIRECTORY | O_CLOEXEC); + ASSERT_LE(0, ruleset_fd); + ASSERT_EQ(-1, landlock_add_rule(ruleset_fd, LANDLOCK_RULE_PATH_BENEATH, + &path_beneath, 0)); + /* Returns EBADF because ruleset_fd is not a landlock-ruleset FD. */ + ASSERT_EQ(EBADF, errno); + ASSERT_EQ(0, close(ruleset_fd)); + + ruleset_fd = open(dir_s1d1, O_DIRECTORY | O_CLOEXEC); + ASSERT_LE(0, ruleset_fd); + ASSERT_EQ(-1, landlock_add_rule(ruleset_fd, LANDLOCK_RULE_PATH_BENEATH, + &path_beneath, 0)); + /* Returns EBADFD because ruleset_fd is not a valid ruleset. */ + ASSERT_EQ(EBADFD, errno); + ASSERT_EQ(0, close(ruleset_fd)); + + /* Gets a real ruleset. */ + ruleset_fd = landlock_create_ruleset(&ruleset_attr, + sizeof(ruleset_attr), 0); + ASSERT_LE(0, ruleset_fd); + ASSERT_EQ(0, landlock_add_rule(ruleset_fd, LANDLOCK_RULE_PATH_BENEATH, + &path_beneath, 0)); + ASSERT_EQ(0, close(path_beneath.parent_fd)); + + /* Tests without O_PATH. */ + path_beneath.parent_fd = open(dir_s1d2, O_DIRECTORY | O_CLOEXEC); + ASSERT_LE(0, path_beneath.parent_fd); + ASSERT_EQ(0, landlock_add_rule(ruleset_fd, LANDLOCK_RULE_PATH_BENEATH, + &path_beneath, 0)); + ASSERT_EQ(0, close(path_beneath.parent_fd)); + + /* Tests with a ruleset FD. */ + path_beneath.parent_fd = ruleset_fd; + ASSERT_EQ(-1, landlock_add_rule(ruleset_fd, LANDLOCK_RULE_PATH_BENEATH, + &path_beneath, 0)); + ASSERT_EQ(EBADFD, errno); + + /* Checks unhandled allowed_access. */ + path_beneath.parent_fd = open(dir_s1d2, O_PATH | O_DIRECTORY | + O_CLOEXEC); + ASSERT_LE(0, path_beneath.parent_fd); + + /* Test with legitimate values. */ + path_beneath.allowed_access |= LANDLOCK_ACCESS_FS_EXECUTE; + ASSERT_EQ(-1, landlock_add_rule(ruleset_fd, LANDLOCK_RULE_PATH_BENEATH, + &path_beneath, 0)); + ASSERT_EQ(EINVAL, errno); + path_beneath.allowed_access &= ~LANDLOCK_ACCESS_FS_EXECUTE; + + /* Test with unknown (64-bits) value. */ + path_beneath.allowed_access |= (1ULL << 60); + ASSERT_EQ(-1, landlock_add_rule(ruleset_fd, LANDLOCK_RULE_PATH_BENEATH, + &path_beneath, 0)); + ASSERT_EQ(EINVAL, errno); + path_beneath.allowed_access &= ~(1ULL << 60); + + /* Test with no access. */ + path_beneath.allowed_access = 0; + ASSERT_EQ(-1, landlock_add_rule(ruleset_fd, LANDLOCK_RULE_PATH_BENEATH, + &path_beneath, 0)); + ASSERT_EQ(ENOMSG, errno); + path_beneath.allowed_access &= ~(1ULL << 60); + + ASSERT_EQ(0, close(path_beneath.parent_fd)); + + /* Enforces the ruleset. */ + ASSERT_EQ(0, prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0)); + ASSERT_EQ(0, landlock_restrict_self(ruleset_fd, 0)); + + ASSERT_EQ(0, close(ruleset_fd)); +} + +#define ACCESS_FILE ( \ + LANDLOCK_ACCESS_FS_EXECUTE | \ + LANDLOCK_ACCESS_FS_WRITE_FILE | \ + LANDLOCK_ACCESS_FS_READ_FILE) + +#define ACCESS_LAST LANDLOCK_ACCESS_FS_MAKE_SYM + +#define ACCESS_ALL ( \ + ACCESS_FILE | \ + LANDLOCK_ACCESS_FS_READ_DIR | \ + LANDLOCK_ACCESS_FS_REMOVE_DIR | \ + LANDLOCK_ACCESS_FS_REMOVE_FILE | \ + LANDLOCK_ACCESS_FS_MAKE_CHAR | \ + LANDLOCK_ACCESS_FS_MAKE_DIR | \ + LANDLOCK_ACCESS_FS_MAKE_REG | \ + LANDLOCK_ACCESS_FS_MAKE_SOCK | \ + LANDLOCK_ACCESS_FS_MAKE_FIFO | \ + LANDLOCK_ACCESS_FS_MAKE_BLOCK | \ + ACCESS_LAST) + +TEST_F_FORK(layout1, file_access_rights) +{ + __u64 access; + int err; + struct landlock_path_beneath_attr path_beneath = {}; + struct landlock_ruleset_attr ruleset_attr = { + .handled_access_fs = ACCESS_ALL, + }; + const int ruleset_fd = landlock_create_ruleset(&ruleset_attr, + sizeof(ruleset_attr), 0); + + ASSERT_LE(0, ruleset_fd); + + /* Tests access rights for files. */ + path_beneath.parent_fd = open(file1_s1d2, O_PATH | O_CLOEXEC); + ASSERT_LE(0, path_beneath.parent_fd); + for (access = 1; access <= ACCESS_LAST; access <<= 1) { + path_beneath.allowed_access = access; + err = landlock_add_rule(ruleset_fd, LANDLOCK_RULE_PATH_BENEATH, + &path_beneath, 0); + if ((access | ACCESS_FILE) == ACCESS_FILE) { + ASSERT_EQ(0, err); + } else { + ASSERT_EQ(-1, err); + ASSERT_EQ(EINVAL, errno); + } + } + ASSERT_EQ(0, close(path_beneath.parent_fd)); +} + +static void add_path_beneath(struct __test_metadata *const _metadata, + const int ruleset_fd, const __u64 allowed_access, + const char *const path) +{ + struct landlock_path_beneath_attr path_beneath = { + .allowed_access = allowed_access, + }; + + path_beneath.parent_fd = open(path, O_PATH | O_CLOEXEC); + ASSERT_LE(0, path_beneath.parent_fd) { + TH_LOG("Failed to open directory \"%s\": %s", path, + strerror(errno)); + } + ASSERT_EQ(0, landlock_add_rule(ruleset_fd, LANDLOCK_RULE_PATH_BENEATH, + &path_beneath, 0)) { + TH_LOG("Failed to update the ruleset with \"%s\": %s", path, + strerror(errno)); + } + ASSERT_EQ(0, close(path_beneath.parent_fd)); +} + +struct rule { + const char *path; + __u64 access; +}; + +#define ACCESS_RO ( \ + LANDLOCK_ACCESS_FS_READ_FILE | \ + LANDLOCK_ACCESS_FS_READ_DIR) + +#define ACCESS_RW ( \ + ACCESS_RO | \ + LANDLOCK_ACCESS_FS_WRITE_FILE) + +static int create_ruleset(struct __test_metadata *const _metadata, + const __u64 handled_access_fs, const struct rule rules[]) +{ + int ruleset_fd, i; + struct landlock_ruleset_attr ruleset_attr = { + .handled_access_fs = handled_access_fs, + }; + + ASSERT_NE(NULL, rules) { + TH_LOG("No rule list"); + } + ASSERT_NE(NULL, rules[0].path) { + TH_LOG("Empty rule list"); + } + + ruleset_fd = landlock_create_ruleset(&ruleset_attr, + sizeof(ruleset_attr), 0); + ASSERT_LE(0, ruleset_fd) { + TH_LOG("Failed to create a ruleset: %s", strerror(errno)); + } + + for (i = 0; rules[i].path; i++) { + add_path_beneath(_metadata, ruleset_fd, rules[i].access, + rules[i].path); + } + return ruleset_fd; +} + +static void enforce_ruleset(struct __test_metadata *const _metadata, + const int ruleset_fd) +{ + ASSERT_EQ(0, prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0)); + ASSERT_EQ(0, landlock_restrict_self(ruleset_fd, 0)) { + TH_LOG("Failed to enforce ruleset: %s", strerror(errno)); + } +} + +TEST_F_FORK(layout1, proc_nsfs) +{ + const struct rule rules[] = { + { + .path = "/dev/null", + .access = LANDLOCK_ACCESS_FS_READ_FILE | + LANDLOCK_ACCESS_FS_WRITE_FILE, + }, + {} + }; + struct landlock_path_beneath_attr path_beneath; + const int ruleset_fd = create_ruleset(_metadata, rules[0].access | + LANDLOCK_ACCESS_FS_READ_DIR, rules); + + ASSERT_LE(0, ruleset_fd); + ASSERT_EQ(0, test_open("/proc/self/ns/mnt", O_RDONLY)); + + enforce_ruleset(_metadata, ruleset_fd); + + ASSERT_EQ(EACCES, test_open("/", O_RDONLY)); + ASSERT_EQ(EACCES, test_open("/dev", O_RDONLY)); + ASSERT_EQ(0, test_open("/dev/null", O_RDONLY)); + ASSERT_EQ(EACCES, test_open("/dev/full", O_RDONLY)); + + ASSERT_EQ(EACCES, test_open("/proc", O_RDONLY)); + ASSERT_EQ(EACCES, test_open("/proc/self", O_RDONLY)); + ASSERT_EQ(EACCES, test_open("/proc/self/ns", O_RDONLY)); + /* + * Because nsfs is an internal filesystem, /proc/self/ns/mnt is a + * disconnected path. Such path cannot be identified and must then be + * allowed. + */ + ASSERT_EQ(0, test_open("/proc/self/ns/mnt", O_RDONLY)); + + /* + * Checks that it is not possible to add nsfs-like filesystem + * references to a ruleset. + */ + path_beneath.allowed_access = LANDLOCK_ACCESS_FS_READ_FILE | + LANDLOCK_ACCESS_FS_WRITE_FILE, + path_beneath.parent_fd = open("/proc/self/ns/mnt", O_PATH | O_CLOEXEC); + ASSERT_LE(0, path_beneath.parent_fd); + ASSERT_EQ(-1, landlock_add_rule(ruleset_fd, LANDLOCK_RULE_PATH_BENEATH, + &path_beneath, 0)); + ASSERT_EQ(EBADFD, errno); + ASSERT_EQ(0, close(path_beneath.parent_fd)); +} + +TEST_F_FORK(layout1, unpriv) { + const struct rule rules[] = { + { + .path = dir_s1d2, + .access = ACCESS_RO, + }, + {} + }; + int ruleset_fd; + + drop_caps(_metadata); + + ruleset_fd = create_ruleset(_metadata, ACCESS_RO, rules); + ASSERT_LE(0, ruleset_fd); + ASSERT_EQ(-1, landlock_restrict_self(ruleset_fd, 0)); + ASSERT_EQ(EPERM, errno); + + /* enforce_ruleset() calls prctl(no_new_privs). */ + enforce_ruleset(_metadata, ruleset_fd); + ASSERT_EQ(0, close(ruleset_fd)); +} + +TEST_F_FORK(layout1, effective_access) +{ + const struct rule rules[] = { + { + .path = dir_s1d2, + .access = ACCESS_RO, + }, + { + .path = file1_s2d2, + .access = LANDLOCK_ACCESS_FS_READ_FILE | + LANDLOCK_ACCESS_FS_WRITE_FILE, + }, + {} + }; + const int ruleset_fd = create_ruleset(_metadata, ACCESS_RW, rules); + char buf; + int reg_fd; + + ASSERT_LE(0, ruleset_fd); + enforce_ruleset(_metadata, ruleset_fd); + ASSERT_EQ(0, close(ruleset_fd)); + + /* Tests on a directory. */ + ASSERT_EQ(EACCES, test_open("/", O_RDONLY)); + ASSERT_EQ(EACCES, test_open(dir_s1d1, O_RDONLY)); + ASSERT_EQ(EACCES, test_open(file1_s1d1, O_RDONLY)); + ASSERT_EQ(0, test_open(dir_s1d2, O_RDONLY)); + ASSERT_EQ(0, test_open(file1_s1d2, O_RDONLY)); + ASSERT_EQ(0, test_open(dir_s1d3, O_RDONLY)); + ASSERT_EQ(0, test_open(file1_s1d3, O_RDONLY)); + + /* Tests on a file. */ + ASSERT_EQ(EACCES, test_open(dir_s2d2, O_RDONLY)); + ASSERT_EQ(0, test_open(file1_s2d2, O_RDONLY)); + + /* Checks effective read and write actions. */ + reg_fd = open(file1_s2d2, O_RDWR | O_CLOEXEC); + ASSERT_LE(0, reg_fd); + ASSERT_EQ(1, write(reg_fd, ".", 1)); + ASSERT_LE(0, lseek(reg_fd, 0, SEEK_SET)); + ASSERT_EQ(1, read(reg_fd, &buf, 1)); + ASSERT_EQ('.', buf); + ASSERT_EQ(0, close(reg_fd)); + + /* Just in case, double-checks effective actions. */ + reg_fd = open(file1_s2d2, O_RDONLY | O_CLOEXEC); + ASSERT_LE(0, reg_fd); + ASSERT_EQ(-1, write(reg_fd, &buf, 1)); + ASSERT_EQ(EBADF, errno); + ASSERT_EQ(0, close(reg_fd)); +} + +TEST_F_FORK(layout1, unhandled_access) +{ + const struct rule rules[] = { + { + .path = dir_s1d2, + .access = ACCESS_RO, + }, + {} + }; + /* Here, we only handle read accesses, not write accesses. */ + const int ruleset_fd = create_ruleset(_metadata, ACCESS_RO, rules); + + ASSERT_LE(0, ruleset_fd); + enforce_ruleset(_metadata, ruleset_fd); + ASSERT_EQ(0, close(ruleset_fd)); + + /* + * Because the policy does not handle LANDLOCK_ACCESS_FS_WRITE_FILE, + * opening for write-only should be allowed, but not read-write. + */ + ASSERT_EQ(0, test_open(file1_s1d1, O_WRONLY)); + ASSERT_EQ(EACCES, test_open(file1_s1d1, O_RDWR)); + + ASSERT_EQ(0, test_open(file1_s1d2, O_WRONLY)); + ASSERT_EQ(0, test_open(file1_s1d2, O_RDWR)); +} + +TEST_F_FORK(layout1, ruleset_overlap) +{ + const struct rule rules[] = { + /* These rules should be ORed among them. */ + { + .path = dir_s1d2, + .access = LANDLOCK_ACCESS_FS_READ_FILE | + LANDLOCK_ACCESS_FS_WRITE_FILE, + }, + { + .path = dir_s1d2, + .access = LANDLOCK_ACCESS_FS_READ_FILE | + LANDLOCK_ACCESS_FS_READ_DIR, + }, + {} + }; + const int ruleset_fd = create_ruleset(_metadata, ACCESS_RW, rules); + + ASSERT_LE(0, ruleset_fd); + enforce_ruleset(_metadata, ruleset_fd); + ASSERT_EQ(0, close(ruleset_fd)); + + /* Checks s1d1 hierarchy. */ + ASSERT_EQ(EACCES, test_open(file1_s1d1, O_RDONLY)); + ASSERT_EQ(EACCES, test_open(file1_s1d1, O_WRONLY)); + ASSERT_EQ(EACCES, test_open(file1_s1d1, O_RDWR)); + ASSERT_EQ(EACCES, test_open(dir_s1d1, O_RDONLY | O_DIRECTORY)); + + /* Checks s1d2 hierarchy. */ + ASSERT_EQ(0, test_open(file1_s1d2, O_RDONLY)); + ASSERT_EQ(0, test_open(file1_s1d2, O_WRONLY)); + ASSERT_EQ(0, test_open(file1_s1d2, O_RDWR)); + ASSERT_EQ(0, test_open(dir_s1d2, O_RDONLY | O_DIRECTORY)); + + /* Checks s1d3 hierarchy. */ + ASSERT_EQ(0, test_open(file1_s1d3, O_RDONLY)); + ASSERT_EQ(0, test_open(file1_s1d3, O_WRONLY)); + ASSERT_EQ(0, test_open(file1_s1d3, O_RDWR)); + ASSERT_EQ(0, test_open(dir_s1d3, O_RDONLY | O_DIRECTORY)); +} + +TEST_F_FORK(layout1, non_overlapping_accesses) +{ + const struct rule layer1[] = { + { + .path = dir_s1d2, + .access = LANDLOCK_ACCESS_FS_MAKE_REG, + }, + {} + }; + const struct rule layer2[] = { + { + .path = dir_s1d3, + .access = LANDLOCK_ACCESS_FS_REMOVE_FILE, + }, + {} + }; + int ruleset_fd; + + ASSERT_EQ(0, unlink(file1_s1d1)); + ASSERT_EQ(0, unlink(file1_s1d2)); + + ruleset_fd = create_ruleset(_metadata, LANDLOCK_ACCESS_FS_MAKE_REG, + layer1); + ASSERT_LE(0, ruleset_fd); + enforce_ruleset(_metadata, ruleset_fd); + ASSERT_EQ(0, close(ruleset_fd)); + + ASSERT_EQ(-1, mknod(file1_s1d1, S_IFREG | 0700, 0)); + ASSERT_EQ(EACCES, errno); + ASSERT_EQ(0, mknod(file1_s1d2, S_IFREG | 0700, 0)); + ASSERT_EQ(0, unlink(file1_s1d2)); + + ruleset_fd = create_ruleset(_metadata, LANDLOCK_ACCESS_FS_REMOVE_FILE, + layer2); + ASSERT_LE(0, ruleset_fd); + enforce_ruleset(_metadata, ruleset_fd); + ASSERT_EQ(0, close(ruleset_fd)); + + /* Unchanged accesses for file creation. */ + ASSERT_EQ(-1, mknod(file1_s1d1, S_IFREG | 0700, 0)); + ASSERT_EQ(EACCES, errno); + ASSERT_EQ(0, mknod(file1_s1d2, S_IFREG | 0700, 0)); + + /* Checks file removing. */ + ASSERT_EQ(-1, unlink(file1_s1d2)); + ASSERT_EQ(EACCES, errno); + ASSERT_EQ(0, unlink(file1_s1d3)); +} + +TEST_F_FORK(layout1, interleaved_masked_accesses) +{ + /* + * Checks overly restrictive rules: + * layer 1: allows R s1d1/s1d2/s1d3/file1 + * layer 2: allows RW s1d1/s1d2/s1d3 + * allows W s1d1/s1d2 + * denies R s1d1/s1d2 + * layer 3: allows R s1d1 + * layer 4: allows R s1d1/s1d2 + * denies W s1d1/s1d2 + * layer 5: allows R s1d1/s1d2 + * layer 6: allows X ---- + * layer 7: allows W s1d1/s1d2 + * denies R s1d1/s1d2 + */ + const struct rule layer1_read[] = { + /* Allows read access to file1_s1d3 with the first layer. */ + { + .path = file1_s1d3, + .access = LANDLOCK_ACCESS_FS_READ_FILE, + }, + {} + }; + /* First rule with write restrictions. */ + const struct rule layer2_read_write[] = { + /* Start by granting read-write access via its parent directory... */ + { + .path = dir_s1d3, + .access = LANDLOCK_ACCESS_FS_READ_FILE | + LANDLOCK_ACCESS_FS_WRITE_FILE, + }, + /* ...but also denies read access via its grandparent directory. */ + { + .path = dir_s1d2, + .access = LANDLOCK_ACCESS_FS_WRITE_FILE, + }, + {} + }; + const struct rule layer3_read[] = { + /* Allows read access via its great-grandparent directory. */ + { + .path = dir_s1d1, + .access = LANDLOCK_ACCESS_FS_READ_FILE, + }, + {} + }; + const struct rule layer4_read_write[] = { + /* + * Try to confuse the deny access by denying write (but not + * read) access via its grandparent directory. + */ + { + .path = dir_s1d2, + .access = LANDLOCK_ACCESS_FS_READ_FILE, + }, + {} + }; + const struct rule layer5_read[] = { + /* + * Try to override layer2's deny read access by explicitly + * allowing read access via file1_s1d3's grandparent. + */ + { + .path = dir_s1d2, + .access = LANDLOCK_ACCESS_FS_READ_FILE, + }, + {} + }; + const struct rule layer6_execute[] = { + /* + * Restricts an unrelated file hierarchy with a new access + * (non-overlapping) type. + */ + { + .path = dir_s2d1, + .access = LANDLOCK_ACCESS_FS_EXECUTE, + }, + {} + }; + const struct rule layer7_read_write[] = { + /* + * Finally, denies read access to file1_s1d3 via its + * grandparent. + */ + { + .path = dir_s1d2, + .access = LANDLOCK_ACCESS_FS_WRITE_FILE, + }, + {} + }; + int ruleset_fd; + + ruleset_fd = create_ruleset(_metadata, LANDLOCK_ACCESS_FS_READ_FILE, + layer1_read); + ASSERT_LE(0, ruleset_fd); + enforce_ruleset(_metadata, ruleset_fd); + ASSERT_EQ(0, close(ruleset_fd)); + + /* Checks that read access is granted for file1_s1d3 with layer 1. */ + ASSERT_EQ(0, test_open(file1_s1d3, O_RDWR)); + ASSERT_EQ(EACCES, test_open(file2_s1d3, O_RDONLY)); + ASSERT_EQ(0, test_open(file2_s1d3, O_WRONLY)); + + ruleset_fd = create_ruleset(_metadata, LANDLOCK_ACCESS_FS_READ_FILE | + LANDLOCK_ACCESS_FS_WRITE_FILE, layer2_read_write); + ASSERT_LE(0, ruleset_fd); + enforce_ruleset(_metadata, ruleset_fd); + ASSERT_EQ(0, close(ruleset_fd)); + + /* Checks that previous access rights are unchanged with layer 2. */ + ASSERT_EQ(0, test_open(file1_s1d3, O_RDWR)); + ASSERT_EQ(EACCES, test_open(file2_s1d3, O_RDONLY)); + ASSERT_EQ(0, test_open(file2_s1d3, O_WRONLY)); + + ruleset_fd = create_ruleset(_metadata, LANDLOCK_ACCESS_FS_READ_FILE, + layer3_read); + ASSERT_LE(0, ruleset_fd); + enforce_ruleset(_metadata, ruleset_fd); + ASSERT_EQ(0, close(ruleset_fd)); + + /* Checks that previous access rights are unchanged with layer 3. */ + ASSERT_EQ(0, test_open(file1_s1d3, O_RDWR)); + ASSERT_EQ(EACCES, test_open(file2_s1d3, O_RDONLY)); + ASSERT_EQ(0, test_open(file2_s1d3, O_WRONLY)); + + /* This time, denies write access for the file hierarchy. */ + ruleset_fd = create_ruleset(_metadata, LANDLOCK_ACCESS_FS_READ_FILE | + LANDLOCK_ACCESS_FS_WRITE_FILE, layer4_read_write); + ASSERT_LE(0, ruleset_fd); + enforce_ruleset(_metadata, ruleset_fd); + ASSERT_EQ(0, close(ruleset_fd)); + + /* + * Checks that the only change with layer 4 is that write access is + * denied. + */ + ASSERT_EQ(0, test_open(file1_s1d3, O_RDONLY)); + ASSERT_EQ(EACCES, test_open(file1_s1d3, O_WRONLY)); + ASSERT_EQ(EACCES, test_open(file2_s1d3, O_RDONLY)); + ASSERT_EQ(EACCES, test_open(file2_s1d3, O_WRONLY)); + + ruleset_fd = create_ruleset(_metadata, LANDLOCK_ACCESS_FS_READ_FILE, + layer5_read); + ASSERT_LE(0, ruleset_fd); + enforce_ruleset(_metadata, ruleset_fd); + ASSERT_EQ(0, close(ruleset_fd)); + + /* Checks that previous access rights are unchanged with layer 5. */ + ASSERT_EQ(0, test_open(file1_s1d3, O_RDONLY)); + ASSERT_EQ(EACCES, test_open(file1_s1d3, O_WRONLY)); + ASSERT_EQ(EACCES, test_open(file2_s1d3, O_WRONLY)); + ASSERT_EQ(EACCES, test_open(file2_s1d3, O_RDONLY)); + + ruleset_fd = create_ruleset(_metadata, LANDLOCK_ACCESS_FS_EXECUTE, + layer6_execute); + ASSERT_LE(0, ruleset_fd); + enforce_ruleset(_metadata, ruleset_fd); + ASSERT_EQ(0, close(ruleset_fd)); + + /* Checks that previous access rights are unchanged with layer 6. */ + ASSERT_EQ(0, test_open(file1_s1d3, O_RDONLY)); + ASSERT_EQ(EACCES, test_open(file1_s1d3, O_WRONLY)); + ASSERT_EQ(EACCES, test_open(file2_s1d3, O_WRONLY)); + ASSERT_EQ(EACCES, test_open(file2_s1d3, O_RDONLY)); + + ruleset_fd = create_ruleset(_metadata, LANDLOCK_ACCESS_FS_READ_FILE | + LANDLOCK_ACCESS_FS_WRITE_FILE, layer7_read_write); + ASSERT_LE(0, ruleset_fd); + enforce_ruleset(_metadata, ruleset_fd); + ASSERT_EQ(0, close(ruleset_fd)); + + /* Checks read access is now denied with layer 7. */ + ASSERT_EQ(EACCES, test_open(file1_s1d3, O_RDONLY)); + ASSERT_EQ(EACCES, test_open(file1_s1d3, O_WRONLY)); + ASSERT_EQ(EACCES, test_open(file2_s1d3, O_WRONLY)); + ASSERT_EQ(EACCES, test_open(file2_s1d3, O_RDONLY)); +} + +TEST_F_FORK(layout1, inherit_subset) +{ + const struct rule rules[] = { + { + .path = dir_s1d2, + .access = LANDLOCK_ACCESS_FS_READ_FILE | + LANDLOCK_ACCESS_FS_READ_DIR, + }, + {} + }; + const int ruleset_fd = create_ruleset(_metadata, ACCESS_RW, rules); + + ASSERT_LE(0, ruleset_fd); + enforce_ruleset(_metadata, ruleset_fd); + + ASSERT_EQ(EACCES, test_open(file1_s1d1, O_WRONLY)); + ASSERT_EQ(EACCES, test_open(dir_s1d1, O_RDONLY | O_DIRECTORY)); + + /* Write access is forbidden. */ + ASSERT_EQ(EACCES, test_open(file1_s1d2, O_WRONLY)); + /* Readdir access is allowed. */ + ASSERT_EQ(0, test_open(dir_s1d2, O_RDONLY | O_DIRECTORY)); + + /* Write access is forbidden. */ + ASSERT_EQ(EACCES, test_open(file1_s1d3, O_WRONLY)); + /* Readdir access is allowed. */ + ASSERT_EQ(0, test_open(dir_s1d3, O_RDONLY | O_DIRECTORY)); + + /* + * Tests shared rule extension: the following rules should not grant + * any new access, only remove some. Once enforced, these rules are + * ANDed with the previous ones. + */ + add_path_beneath(_metadata, ruleset_fd, LANDLOCK_ACCESS_FS_WRITE_FILE, + dir_s1d2); + /* + * According to ruleset_fd, dir_s1d2 should now have the + * LANDLOCK_ACCESS_FS_READ_FILE and LANDLOCK_ACCESS_FS_WRITE_FILE + * access rights (even if this directory is opened a second time). + * However, when enforcing this updated ruleset, the ruleset tied to + * the current process (i.e. its domain) will still only have the + * dir_s1d2 with LANDLOCK_ACCESS_FS_READ_FILE and + * LANDLOCK_ACCESS_FS_READ_DIR accesses, but + * LANDLOCK_ACCESS_FS_WRITE_FILE must not be allowed because it would + * be a privilege escalation. + */ + enforce_ruleset(_metadata, ruleset_fd); + + /* Same tests and results as above. */ + ASSERT_EQ(EACCES, test_open(file1_s1d1, O_WRONLY)); + ASSERT_EQ(EACCES, test_open(dir_s1d1, O_RDONLY | O_DIRECTORY)); + + /* It is still forbidden to write in file1_s1d2. */ + ASSERT_EQ(EACCES, test_open(file1_s1d2, O_WRONLY)); + /* Readdir access is still allowed. */ + ASSERT_EQ(0, test_open(dir_s1d2, O_RDONLY | O_DIRECTORY)); + + /* It is still forbidden to write in file1_s1d3. */ + ASSERT_EQ(EACCES, test_open(file1_s1d3, O_WRONLY)); + /* Readdir access is still allowed. */ + ASSERT_EQ(0, test_open(dir_s1d3, O_RDONLY | O_DIRECTORY)); + + /* + * Try to get more privileges by adding new access rights to the parent + * directory: dir_s1d1. + */ + add_path_beneath(_metadata, ruleset_fd, ACCESS_RW, dir_s1d1); + enforce_ruleset(_metadata, ruleset_fd); + + /* Same tests and results as above. */ + ASSERT_EQ(EACCES, test_open(file1_s1d1, O_WRONLY)); + ASSERT_EQ(EACCES, test_open(dir_s1d1, O_RDONLY | O_DIRECTORY)); + + /* It is still forbidden to write in file1_s1d2. */ + ASSERT_EQ(EACCES, test_open(file1_s1d2, O_WRONLY)); + /* Readdir access is still allowed. */ + ASSERT_EQ(0, test_open(dir_s1d2, O_RDONLY | O_DIRECTORY)); + + /* It is still forbidden to write in file1_s1d3. */ + ASSERT_EQ(EACCES, test_open(file1_s1d3, O_WRONLY)); + /* Readdir access is still allowed. */ + ASSERT_EQ(0, test_open(dir_s1d3, O_RDONLY | O_DIRECTORY)); + + /* + * Now, dir_s1d3 get a new rule tied to it, only allowing + * LANDLOCK_ACCESS_FS_WRITE_FILE. The (kernel internal) difference is + * that there was no rule tied to it before. + */ + add_path_beneath(_metadata, ruleset_fd, LANDLOCK_ACCESS_FS_WRITE_FILE, + dir_s1d3); + enforce_ruleset(_metadata, ruleset_fd); + ASSERT_EQ(0, close(ruleset_fd)); + + /* + * Same tests and results as above, except for open(dir_s1d3) which is + * now denied because the new rule mask the rule previously inherited + * from dir_s1d2. + */ + + /* Same tests and results as above. */ + ASSERT_EQ(EACCES, test_open(file1_s1d1, O_WRONLY)); + ASSERT_EQ(EACCES, test_open(dir_s1d1, O_RDONLY | O_DIRECTORY)); + + /* It is still forbidden to write in file1_s1d2. */ + ASSERT_EQ(EACCES, test_open(file1_s1d2, O_WRONLY)); + /* Readdir access is still allowed. */ + ASSERT_EQ(0, test_open(dir_s1d2, O_RDONLY | O_DIRECTORY)); + + /* It is still forbidden to write in file1_s1d3. */ + ASSERT_EQ(EACCES, test_open(file1_s1d3, O_WRONLY)); + /* + * Readdir of dir_s1d3 is still allowed because of the OR policy inside + * the same layer. + */ + ASSERT_EQ(0, test_open(dir_s1d3, O_RDONLY | O_DIRECTORY)); +} + +TEST_F_FORK(layout1, inherit_superset) +{ + const struct rule rules[] = { + { + .path = dir_s1d3, + .access = ACCESS_RO, + }, + {} + }; + const int ruleset_fd = create_ruleset(_metadata, ACCESS_RW, rules); + + ASSERT_LE(0, ruleset_fd); + enforce_ruleset(_metadata, ruleset_fd); + + /* Readdir access is denied for dir_s1d2. */ + ASSERT_EQ(EACCES, test_open(dir_s1d2, O_RDONLY | O_DIRECTORY)); + /* Readdir access is allowed for dir_s1d3. */ + ASSERT_EQ(0, test_open(dir_s1d3, O_RDONLY | O_DIRECTORY)); + /* File access is allowed for file1_s1d3. */ + ASSERT_EQ(0, test_open(file1_s1d3, O_RDONLY)); + + /* Now dir_s1d2, parent of dir_s1d3, gets a new rule tied to it. */ + add_path_beneath(_metadata, ruleset_fd, LANDLOCK_ACCESS_FS_READ_FILE | + LANDLOCK_ACCESS_FS_READ_DIR, dir_s1d2); + enforce_ruleset(_metadata, ruleset_fd); + ASSERT_EQ(0, close(ruleset_fd)); + + /* Readdir access is still denied for dir_s1d2. */ + ASSERT_EQ(EACCES, test_open(dir_s1d2, O_RDONLY | O_DIRECTORY)); + /* Readdir access is still allowed for dir_s1d3. */ + ASSERT_EQ(0, test_open(dir_s1d3, O_RDONLY | O_DIRECTORY)); + /* File access is still allowed for file1_s1d3. */ + ASSERT_EQ(0, test_open(file1_s1d3, O_RDONLY)); +} + +TEST_F_FORK(layout1, max_layers) +{ + int i, err; + const struct rule rules[] = { + { + .path = dir_s1d2, + .access = ACCESS_RO, + }, + {} + }; + const int ruleset_fd = create_ruleset(_metadata, ACCESS_RW, rules); + + ASSERT_LE(0, ruleset_fd); + for (i = 0; i < 64; i++) + enforce_ruleset(_metadata, ruleset_fd); + + for (i = 0; i < 2; i++) { + err = landlock_restrict_self(ruleset_fd, 0); + ASSERT_EQ(-1, err); + ASSERT_EQ(E2BIG, errno); + } + ASSERT_EQ(0, close(ruleset_fd)); +} + +TEST_F_FORK(layout1, empty_or_same_ruleset) +{ + struct landlock_ruleset_attr ruleset_attr = {}; + int ruleset_fd; + + /* Tests empty handled_access_fs. */ + ruleset_fd = landlock_create_ruleset(&ruleset_attr, + sizeof(ruleset_attr), 0); + ASSERT_LE(-1, ruleset_fd); + ASSERT_EQ(ENOMSG, errno); + + /* Enforces policy which deny read access to all files. */ + ruleset_attr.handled_access_fs = LANDLOCK_ACCESS_FS_READ_FILE; + ruleset_fd = landlock_create_ruleset(&ruleset_attr, + sizeof(ruleset_attr), 0); + ASSERT_LE(0, ruleset_fd); + enforce_ruleset(_metadata, ruleset_fd); + ASSERT_EQ(EACCES, test_open(file1_s1d1, O_RDONLY)); + ASSERT_EQ(0, test_open(dir_s1d1, O_RDONLY)); + + /* Nests a policy which deny read access to all directories. */ + ruleset_attr.handled_access_fs = LANDLOCK_ACCESS_FS_READ_DIR; + ruleset_fd = landlock_create_ruleset(&ruleset_attr, + sizeof(ruleset_attr), 0); + ASSERT_LE(0, ruleset_fd); + enforce_ruleset(_metadata, ruleset_fd); + ASSERT_EQ(EACCES, test_open(file1_s1d1, O_RDONLY)); + ASSERT_EQ(EACCES, test_open(dir_s1d1, O_RDONLY)); + + /* Enforces a second time with the same ruleset. */ + enforce_ruleset(_metadata, ruleset_fd); + ASSERT_EQ(0, close(ruleset_fd)); +} + +TEST_F_FORK(layout1, rule_on_mountpoint) +{ + const struct rule rules[] = { + { + .path = dir_s1d1, + .access = ACCESS_RO, + }, + { + /* dir_s3d2 is a mount point. */ + .path = dir_s3d2, + .access = ACCESS_RO, + }, + {} + }; + const int ruleset_fd = create_ruleset(_metadata, ACCESS_RW, rules); + + ASSERT_LE(0, ruleset_fd); + enforce_ruleset(_metadata, ruleset_fd); + ASSERT_EQ(0, close(ruleset_fd)); + + ASSERT_EQ(0, test_open(dir_s1d1, O_RDONLY)); + + ASSERT_EQ(EACCES, test_open(dir_s2d1, O_RDONLY)); + + ASSERT_EQ(EACCES, test_open(dir_s3d1, O_RDONLY)); + ASSERT_EQ(0, test_open(dir_s3d2, O_RDONLY)); + ASSERT_EQ(0, test_open(dir_s3d3, O_RDONLY)); +} + +TEST_F_FORK(layout1, rule_over_mountpoint) +{ + const struct rule rules[] = { + { + .path = dir_s1d1, + .access = ACCESS_RO, + }, + { + /* dir_s3d2 is a mount point. */ + .path = dir_s3d1, + .access = ACCESS_RO, + }, + {} + }; + const int ruleset_fd = create_ruleset(_metadata, ACCESS_RW, rules); + + ASSERT_LE(0, ruleset_fd); + enforce_ruleset(_metadata, ruleset_fd); + ASSERT_EQ(0, close(ruleset_fd)); + + ASSERT_EQ(0, test_open(dir_s1d1, O_RDONLY)); + + ASSERT_EQ(EACCES, test_open(dir_s2d1, O_RDONLY)); + + ASSERT_EQ(0, test_open(dir_s3d1, O_RDONLY)); + ASSERT_EQ(0, test_open(dir_s3d2, O_RDONLY)); + ASSERT_EQ(0, test_open(dir_s3d3, O_RDONLY)); +} + +/* + * This test verifies that we can apply a landlock rule on the root directory + * (which might require special handling). + */ +TEST_F_FORK(layout1, rule_over_root_allow_then_deny) +{ + struct rule rules[] = { + { + .path = "/", + .access = ACCESS_RO, + }, + {} + }; + int ruleset_fd = create_ruleset(_metadata, ACCESS_RW, rules); + + ASSERT_LE(0, ruleset_fd); + enforce_ruleset(_metadata, ruleset_fd); + ASSERT_EQ(0, close(ruleset_fd)); + + /* Checks allowed access. */ + ASSERT_EQ(0, test_open("/", O_RDONLY)); + ASSERT_EQ(0, test_open(dir_s1d1, O_RDONLY)); + + rules[0].access = LANDLOCK_ACCESS_FS_READ_FILE; + ruleset_fd = create_ruleset(_metadata, ACCESS_RW, rules); + ASSERT_LE(0, ruleset_fd); + enforce_ruleset(_metadata, ruleset_fd); + ASSERT_EQ(0, close(ruleset_fd)); + + /* Checks denied access (on a directory). */ + ASSERT_EQ(EACCES, test_open("/", O_RDONLY)); + ASSERT_EQ(EACCES, test_open(dir_s1d1, O_RDONLY)); +} + +TEST_F_FORK(layout1, rule_over_root_deny) +{ + const struct rule rules[] = { + { + .path = "/", + .access = LANDLOCK_ACCESS_FS_READ_FILE, + }, + {} + }; + const int ruleset_fd = create_ruleset(_metadata, ACCESS_RW, rules); + + ASSERT_LE(0, ruleset_fd); + enforce_ruleset(_metadata, ruleset_fd); + ASSERT_EQ(0, close(ruleset_fd)); + + /* Checks denied access (on a directory). */ + ASSERT_EQ(EACCES, test_open("/", O_RDONLY)); + ASSERT_EQ(EACCES, test_open(dir_s1d1, O_RDONLY)); +} + +TEST_F_FORK(layout1, rule_inside_mount_ns) +{ + const struct rule rules[] = { + { + .path = "s3d3", + .access = ACCESS_RO, + }, + {} + }; + int ruleset_fd; + + set_cap(_metadata, CAP_SYS_ADMIN); + ASSERT_EQ(0, syscall(SYS_pivot_root, dir_s3d2, dir_s3d3)) { + TH_LOG("Failed to pivot root: %s", strerror(errno)); + }; + ASSERT_EQ(0, chdir("/")); + clear_cap(_metadata, CAP_SYS_ADMIN); + + ruleset_fd = create_ruleset(_metadata, ACCESS_RW, rules); + ASSERT_LE(0, ruleset_fd); + enforce_ruleset(_metadata, ruleset_fd); + ASSERT_EQ(0, close(ruleset_fd)); + + ASSERT_EQ(0, test_open("s3d3", O_RDONLY)); + ASSERT_EQ(EACCES, test_open("/", O_RDONLY)); +} + +TEST_F_FORK(layout1, mount_and_pivot) +{ + const struct rule rules[] = { + { + .path = dir_s3d2, + .access = ACCESS_RO, + }, + {} + }; + const int ruleset_fd = create_ruleset(_metadata, ACCESS_RW, rules); + + ASSERT_LE(0, ruleset_fd); + enforce_ruleset(_metadata, ruleset_fd); + ASSERT_EQ(0, close(ruleset_fd)); + + set_cap(_metadata, CAP_SYS_ADMIN); + ASSERT_EQ(-1, mount(NULL, dir_s3d2, NULL, MS_RDONLY, NULL)); + ASSERT_EQ(EPERM, errno); + ASSERT_EQ(-1, syscall(SYS_pivot_root, dir_s3d2, dir_s3d3)); + ASSERT_EQ(EPERM, errno); + clear_cap(_metadata, CAP_SYS_ADMIN); +} + +TEST_F_FORK(layout1, move_mount) +{ + const struct rule rules[] = { + { + .path = dir_s3d2, + .access = ACCESS_RO, + }, + {} + }; + const int ruleset_fd = create_ruleset(_metadata, ACCESS_RW, rules); + + ASSERT_LE(0, ruleset_fd); + + set_cap(_metadata, CAP_SYS_ADMIN); + ASSERT_EQ(0, syscall(SYS_move_mount, AT_FDCWD, dir_s3d2, AT_FDCWD, + dir_s1d2, 0)) { + TH_LOG("Failed to move mount: %s", strerror(errno)); + } + + ASSERT_EQ(0, syscall(SYS_move_mount, AT_FDCWD, dir_s1d2, AT_FDCWD, + dir_s3d2, 0)); + clear_cap(_metadata, CAP_SYS_ADMIN); + + enforce_ruleset(_metadata, ruleset_fd); + ASSERT_EQ(0, close(ruleset_fd)); + + set_cap(_metadata, CAP_SYS_ADMIN); + ASSERT_EQ(-1, syscall(SYS_move_mount, AT_FDCWD, dir_s3d2, AT_FDCWD, + dir_s1d2, 0)); + ASSERT_EQ(EPERM, errno); + clear_cap(_metadata, CAP_SYS_ADMIN); +} + +TEST_F_FORK(layout1, release_inodes) +{ + const struct rule rules[] = { + { + .path = dir_s1d1, + .access = ACCESS_RO, + }, + { + .path = dir_s3d2, + .access = ACCESS_RO, + }, + { + .path = dir_s3d3, + .access = ACCESS_RO, + }, + {} + }; + const int ruleset_fd = create_ruleset(_metadata, ACCESS_RW, rules); + + ASSERT_LE(0, ruleset_fd); + /* Unmount a file hierarchy while it is being used by a ruleset. */ + set_cap(_metadata, CAP_SYS_ADMIN); + ASSERT_EQ(0, umount(dir_s3d2)); + clear_cap(_metadata, CAP_SYS_ADMIN); + + enforce_ruleset(_metadata, ruleset_fd); + ASSERT_EQ(0, close(ruleset_fd)); + + ASSERT_EQ(0, test_open(file1_s1d1, O_RDONLY)); + ASSERT_EQ(EACCES, test_open(dir_s3d2, O_RDONLY)); + /* This dir_s3d3 would not be allowed and does not exist anyway. */ + ASSERT_EQ(ENOENT, test_open(dir_s3d3, O_RDONLY)); +} + +enum relative_access { + REL_OPEN, + REL_CHDIR, + REL_CHROOT_ONLY, + REL_CHROOT_CHDIR, +}; + +static void test_relative_path(struct __test_metadata *const _metadata, + const enum relative_access rel) +{ + /* + * Common layer to check that chroot doesn't ignore it (i.e. a chroot + * is not a disconnected root directory). + */ + const struct rule layer1_base[] = { + { + .path = TMP_DIR, + .access = ACCESS_RO, + }, + {} + }; + const struct rule layer2_subs[] = { + { + .path = dir_s1d2, + .access = ACCESS_RO, + }, + { + .path = dir_s2d2, + .access = ACCESS_RO, + }, + {} + }; + int dirfd, ruleset_fd; + + ruleset_fd = create_ruleset(_metadata, ACCESS_RW, layer1_base); + ASSERT_LE(0, ruleset_fd); + enforce_ruleset(_metadata, ruleset_fd); + ASSERT_EQ(0, close(ruleset_fd)); + + ruleset_fd = create_ruleset(_metadata, ACCESS_RW, layer2_subs); + + ASSERT_LE(0, ruleset_fd); + switch (rel) { + case REL_OPEN: + case REL_CHDIR: + break; + case REL_CHROOT_ONLY: + ASSERT_EQ(0, chdir(dir_s2d2)); + break; + case REL_CHROOT_CHDIR: + ASSERT_EQ(0, chdir(dir_s1d2)); + break; + default: + ASSERT_TRUE(false); + return; + } + + set_cap(_metadata, CAP_SYS_CHROOT); + enforce_ruleset(_metadata, ruleset_fd); + + switch (rel) { + case REL_OPEN: + dirfd = open(dir_s1d2, O_DIRECTORY); + ASSERT_LE(0, dirfd); + break; + case REL_CHDIR: + ASSERT_EQ(0, chdir(dir_s1d2)); + dirfd = AT_FDCWD; + break; + case REL_CHROOT_ONLY: + /* Do chroot into dir_s1d2 (relative to dir_s2d2). */ + ASSERT_EQ(0, chroot("../../s1d1/s1d2")) { + TH_LOG("Failed to chroot: %s", strerror(errno)); + } + dirfd = AT_FDCWD; + break; + case REL_CHROOT_CHDIR: + /* Do chroot into dir_s1d2. */ + ASSERT_EQ(0, chroot(".")) { + TH_LOG("Failed to chroot: %s", strerror(errno)); + } + dirfd = AT_FDCWD; + break; + } + + ASSERT_EQ((rel == REL_CHROOT_CHDIR) ? 0 : EACCES, + test_open_rel(dirfd, "..", O_RDONLY)); + ASSERT_EQ(0, test_open_rel(dirfd, ".", O_RDONLY)); + + if (rel == REL_CHROOT_ONLY) { + /* The current directory is dir_s2d2. */ + ASSERT_EQ(0, test_open_rel(dirfd, "./s2d3", O_RDONLY)); + } else { + /* The current directory is dir_s1d2. */ + ASSERT_EQ(0, test_open_rel(dirfd, "./s1d3", O_RDONLY)); + } + + if (rel == REL_CHROOT_ONLY || rel == REL_CHROOT_CHDIR) { + /* Checks the root dir_s1d2. */ + ASSERT_EQ(0, test_open_rel(dirfd, "/..", O_RDONLY)); + ASSERT_EQ(0, test_open_rel(dirfd, "/", O_RDONLY)); + ASSERT_EQ(0, test_open_rel(dirfd, "/f1", O_RDONLY)); + ASSERT_EQ(0, test_open_rel(dirfd, "/s1d3", O_RDONLY)); + } + + if (rel != REL_CHROOT_CHDIR) { + ASSERT_EQ(EACCES, test_open_rel(dirfd, "../../s1d1", O_RDONLY)); + ASSERT_EQ(0, test_open_rel(dirfd, "../../s1d1/s1d2", O_RDONLY)); + ASSERT_EQ(0, test_open_rel(dirfd, "../../s1d1/s1d2/s1d3", O_RDONLY)); + + ASSERT_EQ(EACCES, test_open_rel(dirfd, "../../s2d1", O_RDONLY)); + ASSERT_EQ(0, test_open_rel(dirfd, "../../s2d1/s2d2", O_RDONLY)); + ASSERT_EQ(0, test_open_rel(dirfd, "../../s2d1/s2d2/s2d3", O_RDONLY)); + } + + if (rel == REL_OPEN) + ASSERT_EQ(0, close(dirfd)); + ASSERT_EQ(0, close(ruleset_fd)); +} + +TEST_F_FORK(layout1, relative_open) +{ + test_relative_path(_metadata, REL_OPEN); +} + +TEST_F_FORK(layout1, relative_chdir) +{ + test_relative_path(_metadata, REL_CHDIR); +} + +TEST_F_FORK(layout1, relative_chroot_only) +{ + test_relative_path(_metadata, REL_CHROOT_ONLY); +} + +TEST_F_FORK(layout1, relative_chroot_chdir) +{ + test_relative_path(_metadata, REL_CHROOT_CHDIR); +} + +static void copy_binary(struct __test_metadata *const _metadata, + const char *const dst_path) +{ + int dst_fd, src_fd; + struct stat statbuf; + + dst_fd = open(dst_path, O_WRONLY | O_TRUNC | O_CLOEXEC); + ASSERT_LE(0, dst_fd) { + TH_LOG("Failed to open \"%s\": %s", dst_path, + strerror(errno)); + } + src_fd = open(BINARY_PATH, O_RDONLY | O_CLOEXEC); + ASSERT_LE(0, src_fd) { + TH_LOG("Failed to open \"" BINARY_PATH "\": %s", + strerror(errno)); + } + ASSERT_EQ(0, fstat(src_fd, &statbuf)); + ASSERT_EQ(statbuf.st_size, sendfile(dst_fd, src_fd, 0, + statbuf.st_size)); + ASSERT_EQ(0, close(src_fd)); + ASSERT_EQ(0, close(dst_fd)); +} + +static void test_execute(struct __test_metadata *const _metadata, + const int err, const char *const path) +{ + int status; + char *const argv[] = {(char *)path, NULL}; + const pid_t child = fork(); + + ASSERT_LE(0, child); + if (child == 0) { + ASSERT_EQ(err ? -1 : 0, execve(path, argv, NULL)) { + TH_LOG("Failed to execute \"%s\": %s", path, + strerror(errno)); + }; + ASSERT_EQ(err, errno); + _exit(_metadata->passed ? 2 : 1); + return; + } + ASSERT_EQ(child, waitpid(child, &status, 0)); + ASSERT_EQ(1, WIFEXITED(status)); + ASSERT_EQ(err ? 2 : 0, WEXITSTATUS(status)) { + TH_LOG("Unexpected return code for \"%s\": %s", path, + strerror(errno)); + }; +} + +TEST_F_FORK(layout1, execute) +{ + const struct rule rules[] = { + { + .path = dir_s1d2, + .access = LANDLOCK_ACCESS_FS_EXECUTE, + }, + {} + }; + const int ruleset_fd = create_ruleset(_metadata, rules[0].access, + rules); + + ASSERT_LE(0, ruleset_fd); + copy_binary(_metadata, file1_s1d1); + copy_binary(_metadata, file1_s1d2); + copy_binary(_metadata, file1_s1d3); + + enforce_ruleset(_metadata, ruleset_fd); + ASSERT_EQ(0, close(ruleset_fd)); + + ASSERT_EQ(0, test_open(dir_s1d1, O_RDONLY)); + ASSERT_EQ(0, test_open(file1_s1d1, O_RDONLY)); + test_execute(_metadata, EACCES, file1_s1d1); + + ASSERT_EQ(0, test_open(dir_s1d2, O_RDONLY)); + ASSERT_EQ(0, test_open(file1_s1d2, O_RDONLY)); + test_execute(_metadata, 0, file1_s1d2); + + ASSERT_EQ(0, test_open(dir_s1d3, O_RDONLY)); + ASSERT_EQ(0, test_open(file1_s1d3, O_RDONLY)); + test_execute(_metadata, 0, file1_s1d3); +} + +TEST_F_FORK(layout1, link) +{ + const struct rule rules[] = { + { + .path = dir_s1d2, + .access = LANDLOCK_ACCESS_FS_MAKE_REG, + }, + {} + }; + const int ruleset_fd = create_ruleset(_metadata, rules[0].access, + rules); + + ASSERT_LE(0, ruleset_fd); + + ASSERT_EQ(0, unlink(file1_s1d1)); + ASSERT_EQ(0, unlink(file1_s1d2)); + ASSERT_EQ(0, unlink(file1_s1d3)); + + enforce_ruleset(_metadata, ruleset_fd); + ASSERT_EQ(0, close(ruleset_fd)); + + ASSERT_EQ(-1, link(file2_s1d1, file1_s1d1)); + ASSERT_EQ(EACCES, errno); + /* Denies linking because of reparenting. */ + ASSERT_EQ(-1, link(file1_s2d1, file1_s1d2)); + ASSERT_EQ(EXDEV, errno); + ASSERT_EQ(-1, link(file2_s1d2, file1_s1d3)); + ASSERT_EQ(EXDEV, errno); + + ASSERT_EQ(0, link(file2_s1d2, file1_s1d2)); + ASSERT_EQ(0, link(file2_s1d3, file1_s1d3)); +} + +TEST_F_FORK(layout1, rename_file) +{ + const struct rule rules[] = { + { + .path = dir_s1d3, + .access = LANDLOCK_ACCESS_FS_REMOVE_FILE, + }, + { + .path = dir_s2d2, + .access = LANDLOCK_ACCESS_FS_REMOVE_FILE, + }, + {} + }; + const int ruleset_fd = create_ruleset(_metadata, rules[0].access, + rules); + + ASSERT_LE(0, ruleset_fd); + + ASSERT_EQ(0, unlink(file1_s1d1)); + ASSERT_EQ(0, unlink(file1_s1d2)); + + enforce_ruleset(_metadata, ruleset_fd); + ASSERT_EQ(0, close(ruleset_fd)); + + /* + * Tries to replace a file, from a directory that allows file removal, + * but to a different directory (which also allows file removal). + */ + ASSERT_EQ(-1, rename(file1_s2d3, file1_s1d3)); + ASSERT_EQ(EXDEV, errno); + ASSERT_EQ(-1, renameat2(AT_FDCWD, file1_s2d3, AT_FDCWD, file1_s1d3, + RENAME_EXCHANGE)); + ASSERT_EQ(EXDEV, errno); + ASSERT_EQ(-1, renameat2(AT_FDCWD, file1_s2d3, AT_FDCWD, dir_s1d3, + RENAME_EXCHANGE)); + ASSERT_EQ(EXDEV, errno); + + /* + * Tries to replace a file, from a directory that denies file removal, + * to a different directory (which allows file removal). + */ + ASSERT_EQ(-1, rename(file1_s2d1, file1_s1d3)); + ASSERT_EQ(EXDEV, errno); + ASSERT_EQ(-1, renameat2(AT_FDCWD, file1_s2d1, AT_FDCWD, file1_s1d3, + RENAME_EXCHANGE)); + ASSERT_EQ(EXDEV, errno); + ASSERT_EQ(-1, renameat2(AT_FDCWD, dir_s2d2, AT_FDCWD, file1_s1d3, + RENAME_EXCHANGE)); + ASSERT_EQ(EXDEV, errno); + + /* Exchanges files and directories that partially allow removal. */ + ASSERT_EQ(-1, renameat2(AT_FDCWD, dir_s2d2, AT_FDCWD, file1_s2d1, + RENAME_EXCHANGE)); + ASSERT_EQ(EACCES, errno); + ASSERT_EQ(-1, renameat2(AT_FDCWD, file1_s2d1, AT_FDCWD, dir_s2d2, + RENAME_EXCHANGE)); + ASSERT_EQ(EACCES, errno); + + /* Renames files with different parents. */ + ASSERT_EQ(-1, rename(file1_s2d2, file1_s1d2)); + ASSERT_EQ(EXDEV, errno); + ASSERT_EQ(0, unlink(file1_s1d3)); + ASSERT_EQ(-1, rename(file1_s2d1, file1_s1d3)); + ASSERT_EQ(EXDEV, errno); + + /* Exchanges and renames files with same parent. */ + ASSERT_EQ(0, renameat2(AT_FDCWD, file2_s2d3, AT_FDCWD, file1_s2d3, + RENAME_EXCHANGE)); + ASSERT_EQ(0, rename(file2_s2d3, file1_s2d3)); + + /* Exchanges files and directories with same parent, twice. */ + ASSERT_EQ(0, renameat2(AT_FDCWD, file1_s2d2, AT_FDCWD, dir_s2d3, + RENAME_EXCHANGE)); + ASSERT_EQ(0, renameat2(AT_FDCWD, file1_s2d2, AT_FDCWD, dir_s2d3, + RENAME_EXCHANGE)); +} + +TEST_F_FORK(layout1, rename_dir) +{ + const struct rule rules[] = { + { + .path = dir_s1d2, + .access = LANDLOCK_ACCESS_FS_REMOVE_DIR, + }, + { + .path = dir_s2d1, + .access = LANDLOCK_ACCESS_FS_REMOVE_DIR, + }, + {} + }; + const int ruleset_fd = create_ruleset(_metadata, rules[0].access, + rules); + + ASSERT_LE(0, ruleset_fd); + + /* Empties dir_s1d3 to allow renaming. */ + ASSERT_EQ(0, unlink(file1_s1d3)); + ASSERT_EQ(0, unlink(file2_s1d3)); + + enforce_ruleset(_metadata, ruleset_fd); + ASSERT_EQ(0, close(ruleset_fd)); + + /* Exchanges and renames directory to a different parent. */ + ASSERT_EQ(-1, renameat2(AT_FDCWD, dir_s2d3, AT_FDCWD, dir_s1d3, + RENAME_EXCHANGE)); + ASSERT_EQ(EXDEV, errno); + ASSERT_EQ(-1, rename(dir_s2d3, dir_s1d3)); + ASSERT_EQ(EXDEV, errno); + ASSERT_EQ(-1, renameat2(AT_FDCWD, file1_s2d2, AT_FDCWD, dir_s1d3, + RENAME_EXCHANGE)); + ASSERT_EQ(EXDEV, errno); + + /* + * Exchanges directory to the same parent, which doesn't allow + * directory removal. + */ + ASSERT_EQ(-1, renameat2(AT_FDCWD, dir_s1d1, AT_FDCWD, dir_s2d1, + RENAME_EXCHANGE)); + ASSERT_EQ(EACCES, errno); + ASSERT_EQ(-1, renameat2(AT_FDCWD, file1_s1d1, AT_FDCWD, dir_s1d2, + RENAME_EXCHANGE)); + ASSERT_EQ(EACCES, errno); + + /* + * Exchanges and renames directory to the same parent, which allows + * directory removal. + */ + ASSERT_EQ(0, renameat2(AT_FDCWD, dir_s1d3, AT_FDCWD, file1_s1d2, + RENAME_EXCHANGE)); + ASSERT_EQ(0, unlink(dir_s1d3)); + ASSERT_EQ(0, mkdir(dir_s1d3, 0700)); + ASSERT_EQ(0, rename(file1_s1d2, dir_s1d3)); + ASSERT_EQ(0, rmdir(dir_s1d3)); +} + +TEST_F_FORK(layout1, remove_dir) +{ + const struct rule rules[] = { + { + .path = dir_s1d2, + .access = LANDLOCK_ACCESS_FS_REMOVE_DIR, + }, + {} + }; + const int ruleset_fd = create_ruleset(_metadata, rules[0].access, + rules); + + ASSERT_LE(0, ruleset_fd); + + ASSERT_EQ(0, unlink(file1_s1d1)); + ASSERT_EQ(0, unlink(file1_s1d2)); + ASSERT_EQ(0, unlink(file1_s1d3)); + ASSERT_EQ(0, unlink(file2_s1d3)); + + enforce_ruleset(_metadata, ruleset_fd); + ASSERT_EQ(0, close(ruleset_fd)); + + ASSERT_EQ(0, rmdir(dir_s1d3)); + ASSERT_EQ(0, mkdir(dir_s1d3, 0700)); + ASSERT_EQ(0, unlinkat(AT_FDCWD, dir_s1d3, AT_REMOVEDIR)); + + /* dir_s1d2 itself cannot be removed. */ + ASSERT_EQ(-1, rmdir(dir_s1d2)); + ASSERT_EQ(EACCES, errno); + ASSERT_EQ(-1, unlinkat(AT_FDCWD, dir_s1d2, AT_REMOVEDIR)); + ASSERT_EQ(EACCES, errno); + ASSERT_EQ(-1, rmdir(dir_s1d1)); + ASSERT_EQ(EACCES, errno); + ASSERT_EQ(-1, unlinkat(AT_FDCWD, dir_s1d1, AT_REMOVEDIR)); + ASSERT_EQ(EACCES, errno); +} + +TEST_F_FORK(layout1, remove_file) +{ + const struct rule rules[] = { + { + .path = dir_s1d2, + .access = LANDLOCK_ACCESS_FS_REMOVE_FILE, + }, + {} + }; + const int ruleset_fd = create_ruleset(_metadata, rules[0].access, + rules); + + ASSERT_LE(0, ruleset_fd); + enforce_ruleset(_metadata, ruleset_fd); + ASSERT_EQ(0, close(ruleset_fd)); + + ASSERT_EQ(-1, unlink(file1_s1d1)); + ASSERT_EQ(EACCES, errno); + ASSERT_EQ(-1, unlinkat(AT_FDCWD, file1_s1d1, 0)); + ASSERT_EQ(EACCES, errno); + ASSERT_EQ(0, unlink(file1_s1d2)); + ASSERT_EQ(0, unlinkat(AT_FDCWD, file1_s1d3, 0)); +} + +static void test_make_file(struct __test_metadata *const _metadata, + const __u64 access, const mode_t mode, const dev_t dev) +{ + const struct rule rules[] = { + { + .path = dir_s1d2, + .access = access, + }, + {} + }; + const int ruleset_fd = create_ruleset(_metadata, access, rules); + + ASSERT_LE(0, ruleset_fd); + + ASSERT_EQ(0, unlink(file1_s1d1)); + ASSERT_EQ(0, unlink(file2_s1d1)); + ASSERT_EQ(0, mknod(file2_s1d1, mode | 0400, dev)) { + TH_LOG("Failed to make file \"%s\": %s", + file2_s1d1, strerror(errno)); + }; + + ASSERT_EQ(0, unlink(file1_s1d2)); + ASSERT_EQ(0, unlink(file2_s1d2)); + + ASSERT_EQ(0, unlink(file1_s1d3)); + ASSERT_EQ(0, unlink(file2_s1d3)); + + enforce_ruleset(_metadata, ruleset_fd); + ASSERT_EQ(0, close(ruleset_fd)); + + ASSERT_EQ(-1, mknod(file1_s1d1, mode | 0400, dev)); + ASSERT_EQ(EACCES, errno); + ASSERT_EQ(-1, link(file2_s1d1, file1_s1d1)); + ASSERT_EQ(EACCES, errno); + ASSERT_EQ(-1, rename(file2_s1d1, file1_s1d1)); + ASSERT_EQ(EACCES, errno); + + ASSERT_EQ(0, mknod(file1_s1d2, mode | 0400, dev)) { + TH_LOG("Failed to make file \"%s\": %s", + file1_s1d2, strerror(errno)); + }; + ASSERT_EQ(0, link(file1_s1d2, file2_s1d2)); + ASSERT_EQ(0, unlink(file2_s1d2)); + ASSERT_EQ(0, rename(file1_s1d2, file2_s1d2)); + + ASSERT_EQ(0, mknod(file1_s1d3, mode | 0400, dev)); + ASSERT_EQ(0, link(file1_s1d3, file2_s1d3)); + ASSERT_EQ(0, unlink(file2_s1d3)); + ASSERT_EQ(0, rename(file1_s1d3, file2_s1d3)); +} + +TEST_F_FORK(layout1, make_char) +{ + /* Creates a /dev/null device. */ + set_cap(_metadata, CAP_MKNOD); + test_make_file(_metadata, LANDLOCK_ACCESS_FS_MAKE_CHAR, S_IFCHR, + makedev(1, 3)); +} + +TEST_F_FORK(layout1, make_block) +{ + /* Creates a /dev/loop0 device. */ + set_cap(_metadata, CAP_MKNOD); + test_make_file(_metadata, LANDLOCK_ACCESS_FS_MAKE_BLOCK, S_IFBLK, + makedev(7, 0)); +} + +TEST_F_FORK(layout1, make_reg_1) +{ + test_make_file(_metadata, LANDLOCK_ACCESS_FS_MAKE_REG, S_IFREG, 0); +} + +TEST_F_FORK(layout1, make_reg_2) +{ + test_make_file(_metadata, LANDLOCK_ACCESS_FS_MAKE_REG, 0, 0); +} + +TEST_F_FORK(layout1, make_sock) +{ + test_make_file(_metadata, LANDLOCK_ACCESS_FS_MAKE_SOCK, S_IFSOCK, 0); +} + +TEST_F_FORK(layout1, make_fifo) +{ + test_make_file(_metadata, LANDLOCK_ACCESS_FS_MAKE_FIFO, S_IFIFO, 0); +} + +TEST_F_FORK(layout1, make_sym) +{ + const struct rule rules[] = { + { + .path = dir_s1d2, + .access = LANDLOCK_ACCESS_FS_MAKE_SYM, + }, + {} + }; + const int ruleset_fd = create_ruleset(_metadata, rules[0].access, + rules); + + ASSERT_LE(0, ruleset_fd); + + ASSERT_EQ(0, unlink(file1_s1d1)); + ASSERT_EQ(0, unlink(file2_s1d1)); + ASSERT_EQ(0, symlink("none", file2_s1d1)); + + ASSERT_EQ(0, unlink(file1_s1d2)); + ASSERT_EQ(0, unlink(file2_s1d2)); + + ASSERT_EQ(0, unlink(file1_s1d3)); + ASSERT_EQ(0, unlink(file2_s1d3)); + + enforce_ruleset(_metadata, ruleset_fd); + ASSERT_EQ(0, close(ruleset_fd)); + + ASSERT_EQ(-1, symlink("none", file1_s1d1)); + ASSERT_EQ(EACCES, errno); + ASSERT_EQ(-1, link(file2_s1d1, file1_s1d1)); + ASSERT_EQ(EACCES, errno); + ASSERT_EQ(-1, rename(file2_s1d1, file1_s1d1)); + ASSERT_EQ(EACCES, errno); + + ASSERT_EQ(0, symlink("none", file1_s1d2)); + ASSERT_EQ(0, link(file1_s1d2, file2_s1d2)); + ASSERT_EQ(0, unlink(file2_s1d2)); + ASSERT_EQ(0, rename(file1_s1d2, file2_s1d2)); + + ASSERT_EQ(0, symlink("none", file1_s1d3)); + ASSERT_EQ(0, link(file1_s1d3, file2_s1d3)); + ASSERT_EQ(0, unlink(file2_s1d3)); + ASSERT_EQ(0, rename(file1_s1d3, file2_s1d3)); +} + +TEST_F_FORK(layout1, make_dir) +{ + const struct rule rules[] = { + { + .path = dir_s1d2, + .access = LANDLOCK_ACCESS_FS_MAKE_DIR, + }, + {} + }; + const int ruleset_fd = create_ruleset(_metadata, rules[0].access, + rules); + + ASSERT_LE(0, ruleset_fd); + + ASSERT_EQ(0, unlink(file1_s1d1)); + ASSERT_EQ(0, unlink(file1_s1d2)); + ASSERT_EQ(0, unlink(file1_s1d3)); + + enforce_ruleset(_metadata, ruleset_fd); + ASSERT_EQ(0, close(ruleset_fd)); + + /* Uses file_* as directory names. */ + ASSERT_EQ(-1, mkdir(file1_s1d1, 0700)); + ASSERT_EQ(EACCES, errno); + ASSERT_EQ(0, mkdir(file1_s1d2, 0700)); + ASSERT_EQ(0, mkdir(file1_s1d3, 0700)); +} + +static int open_proc_fd(struct __test_metadata *const _metadata, const int fd, + const int open_flags) +{ + static const char path_template[] = "/proc/self/fd/%d"; + char procfd_path[sizeof(path_template) + 10]; + const int procfd_path_size = snprintf(procfd_path, sizeof(procfd_path), + path_template, fd); + + ASSERT_LT(procfd_path_size, sizeof(procfd_path)); + return open(procfd_path, open_flags); +} + +TEST_F_FORK(layout1, proc_unlinked_file) +{ + const struct rule rules[] = { + { + .path = file1_s1d2, + .access = LANDLOCK_ACCESS_FS_READ_FILE, + }, + {} + }; + int reg_fd, proc_fd; + const int ruleset_fd = create_ruleset(_metadata, + LANDLOCK_ACCESS_FS_READ_FILE | + LANDLOCK_ACCESS_FS_WRITE_FILE, rules); + + ASSERT_LE(0, ruleset_fd); + enforce_ruleset(_metadata, ruleset_fd); + ASSERT_EQ(0, close(ruleset_fd)); + + ASSERT_EQ(EACCES, test_open(file1_s1d2, O_RDWR)); + ASSERT_EQ(0, test_open(file1_s1d2, O_RDONLY)); + reg_fd = open(file1_s1d2, O_RDONLY | O_CLOEXEC); + ASSERT_LE(0, reg_fd); + ASSERT_EQ(0, unlink(file1_s1d2)); + + proc_fd = open_proc_fd(_metadata, reg_fd, O_RDONLY | O_CLOEXEC); + ASSERT_LE(0, proc_fd); + ASSERT_EQ(0, close(proc_fd)); + + proc_fd = open_proc_fd(_metadata, reg_fd, O_RDWR | O_CLOEXEC); + ASSERT_EQ(-1, proc_fd) { + TH_LOG("Successfully opened /proc/self/fd/%d: %s", + reg_fd, strerror(errno)); + } + ASSERT_EQ(EACCES, errno); + + ASSERT_EQ(0, close(reg_fd)); +} + +TEST_F_FORK(layout1, proc_pipe) +{ + int proc_fd; + int pipe_fds[2]; + char buf = '\0'; + const struct rule rules[] = { + { + .path = dir_s1d2, + .access = LANDLOCK_ACCESS_FS_READ_FILE | + LANDLOCK_ACCESS_FS_WRITE_FILE, + }, + {} + }; + /* Limits read and write access to files tied to the filesystem. */ + const int ruleset_fd = create_ruleset(_metadata, rules[0].access, + rules); + + ASSERT_LE(0, ruleset_fd); + enforce_ruleset(_metadata, ruleset_fd); + ASSERT_EQ(0, close(ruleset_fd)); + + /* Checks enforcement for normal files. */ + ASSERT_EQ(0, test_open(file1_s1d2, O_RDWR)); + ASSERT_EQ(EACCES, test_open(file1_s1d1, O_RDWR)); + + /* Checks access to pipes through FD. */ + ASSERT_EQ(0, pipe2(pipe_fds, O_CLOEXEC)); + ASSERT_EQ(1, write(pipe_fds[1], ".", 1)) { + TH_LOG("Failed to write in pipe: %s", strerror(errno)); + } + ASSERT_EQ(1, read(pipe_fds[0], &buf, 1)); + ASSERT_EQ('.', buf); + + /* Checks write access to pipe through /proc/self/fd . */ + proc_fd = open_proc_fd(_metadata, pipe_fds[1], O_WRONLY | O_CLOEXEC); + ASSERT_LE(0, proc_fd); + ASSERT_EQ(1, write(proc_fd, ".", 1)) { + TH_LOG("Failed to write through /proc/self/fd/%d: %s", + pipe_fds[1], strerror(errno)); + } + ASSERT_EQ(0, close(proc_fd)); + + /* Checks read access to pipe through /proc/self/fd . */ + proc_fd = open_proc_fd(_metadata, pipe_fds[0], O_RDONLY | O_CLOEXEC); + ASSERT_LE(0, proc_fd); + buf = '\0'; + ASSERT_EQ(1, read(proc_fd, &buf, 1)) { + TH_LOG("Failed to read through /proc/self/fd/%d: %s", + pipe_fds[1], strerror(errno)); + } + ASSERT_EQ(0, close(proc_fd)); + + ASSERT_EQ(0, close(pipe_fds[0])); + ASSERT_EQ(0, close(pipe_fds[1])); +} + +FIXTURE(layout1_bind) { +}; + +FIXTURE_SETUP(layout1_bind) +{ + prepare_layout(_metadata); + + create_layout1(_metadata); + + set_cap(_metadata, CAP_SYS_ADMIN); + ASSERT_EQ(0, mount(dir_s1d2, dir_s2d2, NULL, MS_BIND, NULL)); + clear_cap(_metadata, CAP_SYS_ADMIN); +} + +FIXTURE_TEARDOWN(layout1_bind) +{ + set_cap(_metadata, CAP_SYS_ADMIN); + EXPECT_EQ(0, umount(dir_s2d2)); + clear_cap(_metadata, CAP_SYS_ADMIN); + + remove_layout1(_metadata); + + cleanup_layout(_metadata); +} + +static const char bind_dir_s1d3[] = TMP_DIR "/s2d1/s2d2/s1d3"; +static const char bind_file1_s1d3[] = TMP_DIR "/s2d1/s2d2/s1d3/f1"; + +/* + * layout1_bind hierarchy: + * + * tmp + * ├── s1d1 + * │  ├── f1 + * │  ├── f2 + * │  └── s1d2 + * │  ├── f1 + * │  ├── f2 + * │  └── s1d3 + * │  ├── f1 + * │  └── f2 + * ├── s2d1 + * │  ├── f1 + * │  └── s2d2 + * │  ├── f1 + * │  ├── f2 + * │  └── s1d3 + * │  ├── f1 + * │  └── f2 + * └── s3d1 + * └── s3d2 + * └── s3d3 + */ + +TEST_F_FORK(layout1_bind, no_restriction) +{ + ASSERT_EQ(0, test_open(dir_s1d1, O_RDONLY)); + ASSERT_EQ(0, test_open(file1_s1d1, O_RDONLY)); + ASSERT_EQ(0, test_open(dir_s1d2, O_RDONLY)); + ASSERT_EQ(0, test_open(file1_s1d2, O_RDONLY)); + ASSERT_EQ(0, test_open(dir_s1d3, O_RDONLY)); + ASSERT_EQ(0, test_open(file1_s1d3, O_RDONLY)); + + ASSERT_EQ(0, test_open(dir_s2d1, O_RDONLY)); + ASSERT_EQ(0, test_open(file1_s2d1, O_RDONLY)); + ASSERT_EQ(0, test_open(dir_s2d2, O_RDONLY)); + ASSERT_EQ(0, test_open(file1_s2d2, O_RDONLY)); + ASSERT_EQ(ENOENT, test_open(dir_s2d3, O_RDONLY)); + ASSERT_EQ(ENOENT, test_open(file1_s2d3, O_RDONLY)); + + ASSERT_EQ(0, test_open(bind_dir_s1d3, O_RDONLY)); + ASSERT_EQ(0, test_open(bind_file1_s1d3, O_RDONLY)); + + ASSERT_EQ(0, test_open(dir_s3d1, O_RDONLY)); +} + +TEST_F_FORK(layout1_bind, same_content_same_file) +{ + /* + * Sets access right on parent directories of both source and + * destination mount points. + */ + const struct rule layer1_parent[] = { + { + .path = dir_s1d1, + .access = ACCESS_RO, + }, + { + .path = dir_s2d1, + .access = ACCESS_RW, + }, + {} + }; + /* + * Sets access rights on the same bind-mounted directories. The result + * should be ACCESS_RW for both directories, but not both hierarchies + * because of the first layer. + */ + const struct rule layer2_mount_point[] = { + { + .path = dir_s1d2, + .access = LANDLOCK_ACCESS_FS_READ_FILE, + }, + { + .path = dir_s2d2, + .access = ACCESS_RW, + }, + {} + }; + /* Only allow read-access to the s1d3 hierarchies. */ + const struct rule layer3_source[] = { + { + .path = dir_s1d3, + .access = LANDLOCK_ACCESS_FS_READ_FILE, + }, + {} + }; + /* Removes all access rights. */ + const struct rule layer4_destination[] = { + { + .path = bind_file1_s1d3, + .access = LANDLOCK_ACCESS_FS_WRITE_FILE, + }, + {} + }; + int ruleset_fd; + + /* Sets rules for the parent directories. */ + ruleset_fd = create_ruleset(_metadata, ACCESS_RW, layer1_parent); + ASSERT_LE(0, ruleset_fd); + enforce_ruleset(_metadata, ruleset_fd); + ASSERT_EQ(0, close(ruleset_fd)); + + /* Checks source hierarchy. */ + ASSERT_EQ(0, test_open(file1_s1d1, O_RDONLY)); + ASSERT_EQ(EACCES, test_open(file1_s1d1, O_WRONLY)); + ASSERT_EQ(0, test_open(dir_s1d1, O_RDONLY | O_DIRECTORY)); + + ASSERT_EQ(0, test_open(file1_s1d2, O_RDONLY)); + ASSERT_EQ(EACCES, test_open(file1_s1d2, O_WRONLY)); + ASSERT_EQ(0, test_open(dir_s1d2, O_RDONLY | O_DIRECTORY)); + + /* Checks destination hierarchy. */ + ASSERT_EQ(0, test_open(file1_s2d1, O_RDWR)); + ASSERT_EQ(0, test_open(dir_s2d1, O_RDONLY | O_DIRECTORY)); + + ASSERT_EQ(0, test_open(file1_s2d2, O_RDWR)); + ASSERT_EQ(0, test_open(dir_s2d2, O_RDONLY | O_DIRECTORY)); + + /* Sets rules for the mount points. */ + ruleset_fd = create_ruleset(_metadata, ACCESS_RW, layer2_mount_point); + ASSERT_LE(0, ruleset_fd); + enforce_ruleset(_metadata, ruleset_fd); + ASSERT_EQ(0, close(ruleset_fd)); + + /* Checks source hierarchy. */ + ASSERT_EQ(EACCES, test_open(file1_s1d1, O_RDONLY)); + ASSERT_EQ(EACCES, test_open(file1_s1d1, O_WRONLY)); + ASSERT_EQ(EACCES, test_open(dir_s1d1, O_RDONLY | O_DIRECTORY)); + + ASSERT_EQ(0, test_open(file1_s1d2, O_RDONLY)); + ASSERT_EQ(EACCES, test_open(file1_s1d2, O_WRONLY)); + ASSERT_EQ(0, test_open(dir_s1d2, O_RDONLY | O_DIRECTORY)); + + /* Checks destination hierarchy. */ + ASSERT_EQ(EACCES, test_open(file1_s2d1, O_RDONLY)); + ASSERT_EQ(EACCES, test_open(file1_s2d1, O_WRONLY)); + ASSERT_EQ(EACCES, test_open(dir_s2d1, O_RDONLY | O_DIRECTORY)); + + ASSERT_EQ(0, test_open(file1_s2d2, O_RDWR)); + ASSERT_EQ(0, test_open(dir_s2d2, O_RDONLY | O_DIRECTORY)); + ASSERT_EQ(0, test_open(bind_dir_s1d3, O_RDONLY | O_DIRECTORY)); + + /* Sets a (shared) rule only on the source. */ + ruleset_fd = create_ruleset(_metadata, ACCESS_RW, layer3_source); + ASSERT_LE(0, ruleset_fd); + enforce_ruleset(_metadata, ruleset_fd); + ASSERT_EQ(0, close(ruleset_fd)); + + /* Checks source hierarchy. */ + ASSERT_EQ(EACCES, test_open(file1_s1d2, O_RDONLY)); + ASSERT_EQ(EACCES, test_open(file1_s1d2, O_WRONLY)); + ASSERT_EQ(EACCES, test_open(dir_s1d2, O_RDONLY | O_DIRECTORY)); + + ASSERT_EQ(0, test_open(file1_s1d3, O_RDONLY)); + ASSERT_EQ(EACCES, test_open(file1_s1d3, O_WRONLY)); + ASSERT_EQ(EACCES, test_open(dir_s1d3, O_RDONLY | O_DIRECTORY)); + + /* Checks destination hierarchy. */ + ASSERT_EQ(EACCES, test_open(file1_s2d2, O_RDONLY)); + ASSERT_EQ(EACCES, test_open(file1_s2d2, O_WRONLY)); + ASSERT_EQ(EACCES, test_open(dir_s2d2, O_RDONLY | O_DIRECTORY)); + + ASSERT_EQ(0, test_open(bind_file1_s1d3, O_RDONLY)); + ASSERT_EQ(EACCES, test_open(bind_file1_s1d3, O_WRONLY)); + ASSERT_EQ(EACCES, test_open(bind_dir_s1d3, O_RDONLY | O_DIRECTORY)); + + /* Sets a (shared) rule only on the destination. */ + ruleset_fd = create_ruleset(_metadata, ACCESS_RW, layer4_destination); + ASSERT_LE(0, ruleset_fd); + enforce_ruleset(_metadata, ruleset_fd); + ASSERT_EQ(0, close(ruleset_fd)); + + /* Checks source hierarchy. */ + ASSERT_EQ(EACCES, test_open(file1_s1d3, O_RDONLY)); + ASSERT_EQ(EACCES, test_open(file1_s1d3, O_WRONLY)); + + /* Checks destination hierarchy. */ + ASSERT_EQ(EACCES, test_open(bind_file1_s1d3, O_RDONLY)); + ASSERT_EQ(EACCES, test_open(bind_file1_s1d3, O_WRONLY)); +} + +#define LOWER_BASE TMP_DIR "/lower" +#define LOWER_DATA LOWER_BASE "/data" +static const char lower_fl1[] = LOWER_DATA "/fl1"; +static const char lower_dl1[] = LOWER_DATA "/dl1"; +static const char lower_dl1_fl2[] = LOWER_DATA "/dl1/fl2"; +static const char lower_fo1[] = LOWER_DATA "/fo1"; +static const char lower_do1[] = LOWER_DATA "/do1"; +static const char lower_do1_fo2[] = LOWER_DATA "/do1/fo2"; +static const char lower_do1_fl3[] = LOWER_DATA "/do1/fl3"; + +static const char (*lower_base_files[])[] = { + &lower_fl1, + &lower_fo1, + NULL +}; +static const char (*lower_base_directories[])[] = { + &lower_dl1, + &lower_do1, + NULL +}; +static const char (*lower_sub_files[])[] = { + &lower_dl1_fl2, + &lower_do1_fo2, + &lower_do1_fl3, + NULL +}; + +#define UPPER_BASE TMP_DIR "/upper" +#define UPPER_DATA UPPER_BASE "/data" +#define UPPER_WORK UPPER_BASE "/work" +static const char upper_fu1[] = UPPER_DATA "/fu1"; +static const char upper_du1[] = UPPER_DATA "/du1"; +static const char upper_du1_fu2[] = UPPER_DATA "/du1/fu2"; +static const char upper_fo1[] = UPPER_DATA "/fo1"; +static const char upper_do1[] = UPPER_DATA "/do1"; +static const char upper_do1_fo2[] = UPPER_DATA "/do1/fo2"; +static const char upper_do1_fu3[] = UPPER_DATA "/do1/fu3"; + +static const char (*upper_base_files[])[] = { + &upper_fu1, + &upper_fo1, + NULL +}; +static const char (*upper_base_directories[])[] = { + &upper_du1, + &upper_do1, + NULL +}; +static const char (*upper_sub_files[])[] = { + &upper_du1_fu2, + &upper_do1_fo2, + &upper_do1_fu3, + NULL +}; + +#define MERGE_BASE TMP_DIR "/merge" +#define MERGE_DATA MERGE_BASE "/data" +static const char merge_fl1[] = MERGE_DATA "/fl1"; +static const char merge_dl1[] = MERGE_DATA "/dl1"; +static const char merge_dl1_fl2[] = MERGE_DATA "/dl1/fl2"; +static const char merge_fu1[] = MERGE_DATA "/fu1"; +static const char merge_du1[] = MERGE_DATA "/du1"; +static const char merge_du1_fu2[] = MERGE_DATA "/du1/fu2"; +static const char merge_fo1[] = MERGE_DATA "/fo1"; +static const char merge_do1[] = MERGE_DATA "/do1"; +static const char merge_do1_fo2[] = MERGE_DATA "/do1/fo2"; +static const char merge_do1_fl3[] = MERGE_DATA "/do1/fl3"; +static const char merge_do1_fu3[] = MERGE_DATA "/do1/fu3"; + +static const char (*merge_base_files[])[] = { + &merge_fl1, + &merge_fu1, + &merge_fo1, + NULL +}; +static const char (*merge_base_directories[])[] = { + &merge_dl1, + &merge_du1, + &merge_do1, + NULL +}; +static const char (*merge_sub_files[])[] = { + &merge_dl1_fl2, + &merge_du1_fu2, + &merge_do1_fo2, + &merge_do1_fl3, + &merge_do1_fu3, + NULL +}; + +/* + * layout2_overlay hierarchy: + * + * tmp + * ├── lower + * │  └── data + * │  ├── dl1 + * │  │  └── fl2 + * │  ├── do1 + * │  │  ├── fl3 + * │  │  └── fo2 + * │  ├── fl1 + * │  └── fo1 + * ├── merge + * │  └── data + * │  ├── dl1 + * │  │  └── fl2 + * │  ├── do1 + * │  │  ├── fl3 + * │  │  ├── fo2 + * │  │  └── fu3 + * │  ├── du1 + * │  │  └── fu2 + * │  ├── fl1 + * │  ├── fo1 + * │  └── fu1 + * └── upper + * ├── data + * │  ├── do1 + * │  │  ├── fo2 + * │  │  └── fu3 + * │  ├── du1 + * │  │  └── fu2 + * │  ├── fo1 + * │  └── fu1 + * └── work + * └── work + */ + +FIXTURE(layout2_overlay) { +}; + +FIXTURE_SETUP(layout2_overlay) +{ + prepare_layout(_metadata); + + create_directory(_metadata, LOWER_BASE); + set_cap(_metadata, CAP_SYS_ADMIN); + /* Creates tmpfs mount points to get deterministic overlayfs. */ + ASSERT_EQ(0, mount("tmp", LOWER_BASE, "tmpfs", 0, "size=4m,mode=700")); + clear_cap(_metadata, CAP_SYS_ADMIN); + create_file(_metadata, lower_fl1); + create_file(_metadata, lower_dl1_fl2); + create_file(_metadata, lower_fo1); + create_file(_metadata, lower_do1_fo2); + create_file(_metadata, lower_do1_fl3); + + create_directory(_metadata, UPPER_BASE); + set_cap(_metadata, CAP_SYS_ADMIN); + ASSERT_EQ(0, mount("tmp", UPPER_BASE, "tmpfs", 0, "size=4m,mode=700")); + clear_cap(_metadata, CAP_SYS_ADMIN); + create_file(_metadata, upper_fu1); + create_file(_metadata, upper_du1_fu2); + create_file(_metadata, upper_fo1); + create_file(_metadata, upper_do1_fo2); + create_file(_metadata, upper_do1_fu3); + ASSERT_EQ(0, mkdir(UPPER_WORK, 0700)); + + create_directory(_metadata, MERGE_DATA); + set_cap(_metadata, CAP_SYS_ADMIN); + set_cap(_metadata, CAP_DAC_OVERRIDE); + ASSERT_EQ(0, mount("overlay", MERGE_DATA, "overlay", 0, + "lowerdir=" LOWER_DATA + ",upperdir=" UPPER_DATA + ",workdir=" UPPER_WORK)); + clear_cap(_metadata, CAP_DAC_OVERRIDE); + clear_cap(_metadata, CAP_SYS_ADMIN); +} + +FIXTURE_TEARDOWN(layout2_overlay) +{ + EXPECT_EQ(0, remove_path(lower_do1_fl3)); + EXPECT_EQ(0, remove_path(lower_dl1_fl2)); + EXPECT_EQ(0, remove_path(lower_fl1)); + EXPECT_EQ(0, remove_path(lower_do1_fo2)); + EXPECT_EQ(0, remove_path(lower_fo1)); + set_cap(_metadata, CAP_SYS_ADMIN); + EXPECT_EQ(0, umount(LOWER_BASE)); + clear_cap(_metadata, CAP_SYS_ADMIN); + EXPECT_EQ(0, remove_path(LOWER_BASE)); + + EXPECT_EQ(0, remove_path(upper_do1_fu3)); + EXPECT_EQ(0, remove_path(upper_du1_fu2)); + EXPECT_EQ(0, remove_path(upper_fu1)); + EXPECT_EQ(0, remove_path(upper_do1_fo2)); + EXPECT_EQ(0, remove_path(upper_fo1)); + EXPECT_EQ(0, remove_path(UPPER_WORK "/work")); + set_cap(_metadata, CAP_SYS_ADMIN); + EXPECT_EQ(0, umount(UPPER_BASE)); + clear_cap(_metadata, CAP_SYS_ADMIN); + EXPECT_EQ(0, remove_path(UPPER_BASE)); + + set_cap(_metadata, CAP_SYS_ADMIN); + EXPECT_EQ(0, umount(MERGE_DATA)); + clear_cap(_metadata, CAP_SYS_ADMIN); + EXPECT_EQ(0, remove_path(MERGE_DATA)); + + cleanup_layout(_metadata); +} + +TEST_F_FORK(layout2_overlay, no_restriction) +{ + ASSERT_EQ(0, test_open(lower_fl1, O_RDONLY)); + ASSERT_EQ(0, test_open(lower_dl1, O_RDONLY)); + ASSERT_EQ(0, test_open(lower_dl1_fl2, O_RDONLY)); + ASSERT_EQ(0, test_open(lower_fo1, O_RDONLY)); + ASSERT_EQ(0, test_open(lower_do1, O_RDONLY)); + ASSERT_EQ(0, test_open(lower_do1_fo2, O_RDONLY)); + ASSERT_EQ(0, test_open(lower_do1_fl3, O_RDONLY)); + + ASSERT_EQ(0, test_open(upper_fu1, O_RDONLY)); + ASSERT_EQ(0, test_open(upper_du1, O_RDONLY)); + ASSERT_EQ(0, test_open(upper_du1_fu2, O_RDONLY)); + ASSERT_EQ(0, test_open(upper_fo1, O_RDONLY)); + ASSERT_EQ(0, test_open(upper_do1, O_RDONLY)); + ASSERT_EQ(0, test_open(upper_do1_fo2, O_RDONLY)); + ASSERT_EQ(0, test_open(upper_do1_fu3, O_RDONLY)); + + ASSERT_EQ(0, test_open(merge_fl1, O_RDONLY)); + ASSERT_EQ(0, test_open(merge_dl1, O_RDONLY)); + ASSERT_EQ(0, test_open(merge_dl1_fl2, O_RDONLY)); + ASSERT_EQ(0, test_open(merge_fu1, O_RDONLY)); + ASSERT_EQ(0, test_open(merge_du1, O_RDONLY)); + ASSERT_EQ(0, test_open(merge_du1_fu2, O_RDONLY)); + ASSERT_EQ(0, test_open(merge_fo1, O_RDONLY)); + ASSERT_EQ(0, test_open(merge_do1, O_RDONLY)); + ASSERT_EQ(0, test_open(merge_do1_fo2, O_RDONLY)); + ASSERT_EQ(0, test_open(merge_do1_fl3, O_RDONLY)); + ASSERT_EQ(0, test_open(merge_do1_fu3, O_RDONLY)); +} + +#define for_each_path(path_list, path_entry, i) \ + for (i = 0, path_entry = *path_list[i]; path_list[i]; \ + path_entry = *path_list[++i]) + +TEST_F_FORK(layout2_overlay, same_content_different_file) +{ + /* Sets access right on parent directories of both layers. */ + const struct rule layer1_base[] = { + { + .path = LOWER_BASE, + .access = LANDLOCK_ACCESS_FS_READ_FILE, + }, + { + .path = UPPER_BASE, + .access = LANDLOCK_ACCESS_FS_READ_FILE, + }, + { + .path = MERGE_BASE, + .access = ACCESS_RW, + }, + {} + }; + const struct rule layer2_data[] = { + { + .path = LOWER_DATA, + .access = LANDLOCK_ACCESS_FS_READ_FILE, + }, + { + .path = UPPER_DATA, + .access = LANDLOCK_ACCESS_FS_READ_FILE, + }, + { + .path = MERGE_DATA, + .access = ACCESS_RW, + }, + {} + }; + /* Sets access right on directories inside both layers. */ + const struct rule layer3_subdirs[] = { + { + .path = lower_dl1, + .access = LANDLOCK_ACCESS_FS_READ_FILE, + }, + { + .path = lower_do1, + .access = LANDLOCK_ACCESS_FS_READ_FILE, + }, + { + .path = upper_du1, + .access = LANDLOCK_ACCESS_FS_READ_FILE, + }, + { + .path = upper_do1, + .access = LANDLOCK_ACCESS_FS_READ_FILE, + }, + { + .path = merge_dl1, + .access = ACCESS_RW, + }, + { + .path = merge_du1, + .access = ACCESS_RW, + }, + { + .path = merge_do1, + .access = ACCESS_RW, + }, + {} + }; + /* Tighten access rights to the files. */ + const struct rule layer4_files[] = { + { + .path = lower_dl1_fl2, + .access = LANDLOCK_ACCESS_FS_READ_FILE, + }, + { + .path = lower_do1_fo2, + .access = LANDLOCK_ACCESS_FS_READ_FILE, + }, + { + .path = lower_do1_fl3, + .access = LANDLOCK_ACCESS_FS_READ_FILE, + }, + { + .path = upper_du1_fu2, + .access = LANDLOCK_ACCESS_FS_READ_FILE, + }, + { + .path = upper_do1_fo2, + .access = LANDLOCK_ACCESS_FS_READ_FILE, + }, + { + .path = upper_do1_fu3, + .access = LANDLOCK_ACCESS_FS_READ_FILE, + }, + { + .path = merge_dl1_fl2, + .access = LANDLOCK_ACCESS_FS_READ_FILE | + LANDLOCK_ACCESS_FS_WRITE_FILE, + }, + { + .path = merge_du1_fu2, + .access = LANDLOCK_ACCESS_FS_READ_FILE | + LANDLOCK_ACCESS_FS_WRITE_FILE, + }, + { + .path = merge_do1_fo2, + .access = LANDLOCK_ACCESS_FS_READ_FILE | + LANDLOCK_ACCESS_FS_WRITE_FILE, + }, + { + .path = merge_do1_fl3, + .access = LANDLOCK_ACCESS_FS_READ_FILE | + LANDLOCK_ACCESS_FS_WRITE_FILE, + }, + { + .path = merge_do1_fu3, + .access = LANDLOCK_ACCESS_FS_READ_FILE | + LANDLOCK_ACCESS_FS_WRITE_FILE, + }, + {} + }; + const struct rule layer5_merge_only[] = { + { + .path = MERGE_DATA, + .access = LANDLOCK_ACCESS_FS_READ_FILE | + LANDLOCK_ACCESS_FS_WRITE_FILE, + }, + {} + }; + int ruleset_fd; + size_t i; + const char *path_entry; + + /* Sets rules on base directories (i.e. outside overlay scope). */ + ruleset_fd = create_ruleset(_metadata, ACCESS_RW, layer1_base); + ASSERT_LE(0, ruleset_fd); + enforce_ruleset(_metadata, ruleset_fd); + ASSERT_EQ(0, close(ruleset_fd)); + + /* Checks lower layer. */ + for_each_path(lower_base_files, path_entry, i) { + ASSERT_EQ(0, test_open(path_entry, O_RDONLY)); + ASSERT_EQ(EACCES, test_open(path_entry, O_WRONLY)); + } + for_each_path(lower_base_directories, path_entry, i) { + ASSERT_EQ(EACCES, test_open(path_entry, O_RDONLY | O_DIRECTORY)); + } + for_each_path(lower_sub_files, path_entry, i) { + ASSERT_EQ(0, test_open(path_entry, O_RDONLY)); + ASSERT_EQ(EACCES, test_open(path_entry, O_WRONLY)); + } + /* Checks upper layer. */ + for_each_path(upper_base_files, path_entry, i) { + ASSERT_EQ(0, test_open(path_entry, O_RDONLY)); + ASSERT_EQ(EACCES, test_open(path_entry, O_WRONLY)); + } + for_each_path(upper_base_directories, path_entry, i) { + ASSERT_EQ(EACCES, test_open(path_entry, O_RDONLY | O_DIRECTORY)); + } + for_each_path(upper_sub_files, path_entry, i) { + ASSERT_EQ(0, test_open(path_entry, O_RDONLY)); + ASSERT_EQ(EACCES, test_open(path_entry, O_WRONLY)); + } + /* + * Checks that access rights are independent from the lower and upper + * layers: write access to upper files viewed through the merge point + * is still allowed, and write access to lower file viewed (and copied) + * through the merge point is still allowed. + */ + for_each_path(merge_base_files, path_entry, i) { + ASSERT_EQ(0, test_open(path_entry, O_RDWR)); + } + for_each_path(merge_base_directories, path_entry, i) { + ASSERT_EQ(0, test_open(path_entry, O_RDONLY | O_DIRECTORY)); + } + for_each_path(merge_sub_files, path_entry, i) { + ASSERT_EQ(0, test_open(path_entry, O_RDWR)); + } + + /* Sets rules on data directories (i.e. inside overlay scope). */ + ruleset_fd = create_ruleset(_metadata, ACCESS_RW, layer2_data); + ASSERT_LE(0, ruleset_fd); + enforce_ruleset(_metadata, ruleset_fd); + ASSERT_EQ(0, close(ruleset_fd)); + + /* Checks merge. */ + for_each_path(merge_base_files, path_entry, i) { + ASSERT_EQ(0, test_open(path_entry, O_RDWR)); + } + for_each_path(merge_base_directories, path_entry, i) { + ASSERT_EQ(0, test_open(path_entry, O_RDONLY | O_DIRECTORY)); + } + for_each_path(merge_sub_files, path_entry, i) { + ASSERT_EQ(0, test_open(path_entry, O_RDWR)); + } + + /* Same checks with tighter rules. */ + ruleset_fd = create_ruleset(_metadata, ACCESS_RW, layer3_subdirs); + ASSERT_LE(0, ruleset_fd); + enforce_ruleset(_metadata, ruleset_fd); + ASSERT_EQ(0, close(ruleset_fd)); + + /* Checks changes for lower layer. */ + for_each_path(lower_base_files, path_entry, i) { + ASSERT_EQ(EACCES, test_open(path_entry, O_RDONLY)); + } + /* Checks changes for upper layer. */ + for_each_path(upper_base_files, path_entry, i) { + ASSERT_EQ(EACCES, test_open(path_entry, O_RDONLY)); + } + /* Checks all merge accesses. */ + for_each_path(merge_base_files, path_entry, i) { + ASSERT_EQ(EACCES, test_open(path_entry, O_RDWR)); + } + for_each_path(merge_base_directories, path_entry, i) { + ASSERT_EQ(0, test_open(path_entry, O_RDONLY | O_DIRECTORY)); + } + for_each_path(merge_sub_files, path_entry, i) { + ASSERT_EQ(0, test_open(path_entry, O_RDWR)); + } + + /* Sets rules directly on overlayed files. */ + ruleset_fd = create_ruleset(_metadata, ACCESS_RW, layer4_files); + ASSERT_LE(0, ruleset_fd); + enforce_ruleset(_metadata, ruleset_fd); + ASSERT_EQ(0, close(ruleset_fd)); + + /* Checks unchanged accesses on lower layer. */ + for_each_path(lower_sub_files, path_entry, i) { + ASSERT_EQ(0, test_open(path_entry, O_RDONLY)); + ASSERT_EQ(EACCES, test_open(path_entry, O_WRONLY)); + } + /* Checks unchanged accesses on upper layer. */ + for_each_path(upper_sub_files, path_entry, i) { + ASSERT_EQ(0, test_open(path_entry, O_RDONLY)); + ASSERT_EQ(EACCES, test_open(path_entry, O_WRONLY)); + } + /* Checks all merge accesses. */ + for_each_path(merge_base_files, path_entry, i) { + ASSERT_EQ(EACCES, test_open(path_entry, O_RDWR)); + } + for_each_path(merge_base_directories, path_entry, i) { + ASSERT_EQ(EACCES, test_open(path_entry, O_RDONLY | O_DIRECTORY)); + } + for_each_path(merge_sub_files, path_entry, i) { + ASSERT_EQ(0, test_open(path_entry, O_RDWR)); + } + + /* Only allowes access to the merge hierarchy. */ + ruleset_fd = create_ruleset(_metadata, ACCESS_RW, layer5_merge_only); + ASSERT_LE(0, ruleset_fd); + enforce_ruleset(_metadata, ruleset_fd); + ASSERT_EQ(0, close(ruleset_fd)); + + /* Checks new accesses on lower layer. */ + for_each_path(lower_sub_files, path_entry, i) { + ASSERT_EQ(EACCES, test_open(path_entry, O_RDONLY)); + } + /* Checks new accesses on upper layer. */ + for_each_path(upper_sub_files, path_entry, i) { + ASSERT_EQ(EACCES, test_open(path_entry, O_RDONLY)); + } + /* Checks all merge accesses. */ + for_each_path(merge_base_files, path_entry, i) { + ASSERT_EQ(EACCES, test_open(path_entry, O_RDWR)); + } + for_each_path(merge_base_directories, path_entry, i) { + ASSERT_EQ(EACCES, test_open(path_entry, O_RDONLY | O_DIRECTORY)); + } + for_each_path(merge_sub_files, path_entry, i) { + ASSERT_EQ(0, test_open(path_entry, O_RDWR)); + } +} + +TEST_HARNESS_MAIN diff --git a/tools/testing/selftests/landlock/ptrace_test.c b/tools/testing/selftests/landlock/ptrace_test.c new file mode 100644 index 000000000000..15fbef9cc849 --- /dev/null +++ b/tools/testing/selftests/landlock/ptrace_test.c @@ -0,0 +1,337 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Landlock tests - Ptrace + * + * Copyright © 2017-2020 Mickaël Salaün <mic@digikod.net> + * Copyright © 2019-2020 ANSSI + */ + +#define _GNU_SOURCE +#include <errno.h> +#include <fcntl.h> +#include <linux/landlock.h> +#include <signal.h> +#include <sys/prctl.h> +#include <sys/ptrace.h> +#include <sys/types.h> +#include <sys/wait.h> +#include <unistd.h> + +#include "common.h" + +static void create_domain(struct __test_metadata *const _metadata) +{ + int ruleset_fd; + struct landlock_ruleset_attr ruleset_attr = { + .handled_access_fs = LANDLOCK_ACCESS_FS_MAKE_BLOCK, + }; + + ruleset_fd = landlock_create_ruleset(&ruleset_attr, + sizeof(ruleset_attr), 0); + EXPECT_LE(0, ruleset_fd) { + TH_LOG("Failed to create a ruleset: %s", strerror(errno)); + } + EXPECT_EQ(0, prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0)); + EXPECT_EQ(0, landlock_restrict_self(ruleset_fd, 0)); + EXPECT_EQ(0, close(ruleset_fd)); +} + +static int test_ptrace_read(const pid_t pid) +{ + static const char path_template[] = "/proc/%d/environ"; + char procenv_path[sizeof(path_template) + 10]; + int procenv_path_size, fd; + + procenv_path_size = snprintf(procenv_path, sizeof(procenv_path), + path_template, pid); + if (procenv_path_size >= sizeof(procenv_path)) + return E2BIG; + + fd = open(procenv_path, O_RDONLY | O_CLOEXEC); + if (fd < 0) + return errno; + /* + * Mixing error codes from close(2) and open(2) should not lead to any + * (access type) confusion for this test. + */ + if (close(fd) != 0) + return errno; + return 0; +} + +FIXTURE(hierarchy) { }; + +FIXTURE_VARIANT(hierarchy) { + const bool domain_both; + const bool domain_parent; + const bool domain_child; +}; + +/* + * Test multiple tracing combinations between a parent process P1 and a child + * process P2. + * + * Yama's scoped ptrace is presumed disabled. If enabled, this optional + * restriction is enforced in addition to any Landlock check, which means that + * all P2 requests to trace P1 would be denied. + */ + +/* + * No domain + * + * P1-. P1 -> P2 : allow + * \ P2 -> P1 : allow + * 'P2 + */ +FIXTURE_VARIANT_ADD(hierarchy, allow_without_domain) { + .domain_both = false, + .domain_parent = false, + .domain_child = false, +}; + +/* + * Child domain + * + * P1--. P1 -> P2 : allow + * \ P2 -> P1 : deny + * .'-----. + * | P2 | + * '------' + */ +FIXTURE_VARIANT_ADD(hierarchy, allow_with_one_domain) { + .domain_both = false, + .domain_parent = false, + .domain_child = true, +}; + +/* + * Parent domain + * .------. + * | P1 --. P1 -> P2 : deny + * '------' \ P2 -> P1 : allow + * ' + * P2 + */ +FIXTURE_VARIANT_ADD(hierarchy, deny_with_parent_domain) { + .domain_both = false, + .domain_parent = true, + .domain_child = false, +}; + +/* + * Parent + child domain (siblings) + * .------. + * | P1 ---. P1 -> P2 : deny + * '------' \ P2 -> P1 : deny + * .---'--. + * | P2 | + * '------' + */ +FIXTURE_VARIANT_ADD(hierarchy, deny_with_sibling_domain) { + .domain_both = false, + .domain_parent = true, + .domain_child = true, +}; + +/* + * Same domain (inherited) + * .-------------. + * | P1----. | P1 -> P2 : allow + * | \ | P2 -> P1 : allow + * | ' | + * | P2 | + * '-------------' + */ +FIXTURE_VARIANT_ADD(hierarchy, allow_sibling_domain) { + .domain_both = true, + .domain_parent = false, + .domain_child = false, +}; + +/* + * Inherited + child domain + * .-----------------. + * | P1----. | P1 -> P2 : allow + * | \ | P2 -> P1 : deny + * | .-'----. | + * | | P2 | | + * | '------' | + * '-----------------' + */ +FIXTURE_VARIANT_ADD(hierarchy, allow_with_nested_domain) { + .domain_both = true, + .domain_parent = false, + .domain_child = true, +}; + +/* + * Inherited + parent domain + * .-----------------. + * |.------. | P1 -> P2 : deny + * || P1 ----. | P2 -> P1 : allow + * |'------' \ | + * | ' | + * | P2 | + * '-----------------' + */ +FIXTURE_VARIANT_ADD(hierarchy, deny_with_nested_and_parent_domain) { + .domain_both = true, + .domain_parent = true, + .domain_child = false, +}; + +/* + * Inherited + parent and child domain (siblings) + * .-----------------. + * | .------. | P1 -> P2 : deny + * | | P1 . | P2 -> P1 : deny + * | '------'\ | + * | \ | + * | .--'---. | + * | | P2 | | + * | '------' | + * '-----------------' + */ +FIXTURE_VARIANT_ADD(hierarchy, deny_with_forked_domain) { + .domain_both = true, + .domain_parent = true, + .domain_child = true, +}; + +FIXTURE_SETUP(hierarchy) +{ } + +FIXTURE_TEARDOWN(hierarchy) +{ } + +/* Test PTRACE_TRACEME and PTRACE_ATTACH for parent and child. */ +TEST_F(hierarchy, trace) +{ + pid_t child, parent; + int status, err_proc_read; + int pipe_child[2], pipe_parent[2]; + char buf_parent; + long ret; + + /* + * Removes all effective and permitted capabilities to not interfere + * with cap_ptrace_access_check() in case of PTRACE_MODE_FSCREDS. + */ + drop_caps(_metadata); + + parent = getpid(); + ASSERT_EQ(0, pipe2(pipe_child, O_CLOEXEC)); + ASSERT_EQ(0, pipe2(pipe_parent, O_CLOEXEC)); + if (variant->domain_both) { + create_domain(_metadata); + if (!_metadata->passed) + /* Aborts before forking. */ + return; + } + + child = fork(); + ASSERT_LE(0, child); + if (child == 0) { + char buf_child; + + ASSERT_EQ(0, close(pipe_parent[1])); + ASSERT_EQ(0, close(pipe_child[0])); + if (variant->domain_child) + create_domain(_metadata); + + /* Waits for the parent to be in a domain, if any. */ + ASSERT_EQ(1, read(pipe_parent[0], &buf_child, 1)); + + /* Tests PTRACE_ATTACH and PTRACE_MODE_READ on the parent. */ + err_proc_read = test_ptrace_read(parent); + ret = ptrace(PTRACE_ATTACH, parent, NULL, 0); + if (variant->domain_child) { + EXPECT_EQ(-1, ret); + EXPECT_EQ(EPERM, errno); + EXPECT_EQ(EACCES, err_proc_read); + } else { + EXPECT_EQ(0, ret); + EXPECT_EQ(0, err_proc_read); + } + if (ret == 0) { + ASSERT_EQ(parent, waitpid(parent, &status, 0)); + ASSERT_EQ(1, WIFSTOPPED(status)); + ASSERT_EQ(0, ptrace(PTRACE_DETACH, parent, NULL, 0)); + } + + /* Tests child PTRACE_TRACEME. */ + ret = ptrace(PTRACE_TRACEME); + if (variant->domain_parent) { + EXPECT_EQ(-1, ret); + EXPECT_EQ(EPERM, errno); + } else { + EXPECT_EQ(0, ret); + } + + /* + * Signals that the PTRACE_ATTACH test is done and the + * PTRACE_TRACEME test is ongoing. + */ + ASSERT_EQ(1, write(pipe_child[1], ".", 1)); + + if (!variant->domain_parent) { + ASSERT_EQ(0, raise(SIGSTOP)); + } + + /* Waits for the parent PTRACE_ATTACH test. */ + ASSERT_EQ(1, read(pipe_parent[0], &buf_child, 1)); + _exit(_metadata->passed ? EXIT_SUCCESS : EXIT_FAILURE); + return; + } + + ASSERT_EQ(0, close(pipe_child[1])); + ASSERT_EQ(0, close(pipe_parent[0])); + if (variant->domain_parent) + create_domain(_metadata); + + /* Signals that the parent is in a domain, if any. */ + ASSERT_EQ(1, write(pipe_parent[1], ".", 1)); + + /* + * Waits for the child to test PTRACE_ATTACH on the parent and start + * testing PTRACE_TRACEME. + */ + ASSERT_EQ(1, read(pipe_child[0], &buf_parent, 1)); + + /* Tests child PTRACE_TRACEME. */ + if (!variant->domain_parent) { + ASSERT_EQ(child, waitpid(child, &status, 0)); + ASSERT_EQ(1, WIFSTOPPED(status)); + ASSERT_EQ(0, ptrace(PTRACE_DETACH, child, NULL, 0)); + } else { + /* The child should not be traced by the parent. */ + EXPECT_EQ(-1, ptrace(PTRACE_DETACH, child, NULL, 0)); + EXPECT_EQ(ESRCH, errno); + } + + /* Tests PTRACE_ATTACH and PTRACE_MODE_READ on the child. */ + err_proc_read = test_ptrace_read(child); + ret = ptrace(PTRACE_ATTACH, child, NULL, 0); + if (variant->domain_parent) { + EXPECT_EQ(-1, ret); + EXPECT_EQ(EPERM, errno); + EXPECT_EQ(EACCES, err_proc_read); + } else { + EXPECT_EQ(0, ret); + EXPECT_EQ(0, err_proc_read); + } + if (ret == 0) { + ASSERT_EQ(child, waitpid(child, &status, 0)); + ASSERT_EQ(1, WIFSTOPPED(status)); + ASSERT_EQ(0, ptrace(PTRACE_DETACH, child, NULL, 0)); + } + + /* Signals that the parent PTRACE_ATTACH test is done. */ + ASSERT_EQ(1, write(pipe_parent[1], ".", 1)); + ASSERT_EQ(child, waitpid(child, &status, 0)); + if (WIFSIGNALED(status) || !WIFEXITED(status) || + WEXITSTATUS(status) != EXIT_SUCCESS) + _metadata->passed = 0; +} + +TEST_HARNESS_MAIN diff --git a/tools/testing/selftests/landlock/true.c b/tools/testing/selftests/landlock/true.c new file mode 100644 index 000000000000..3f9ccbf52783 --- /dev/null +++ b/tools/testing/selftests/landlock/true.c @@ -0,0 +1,5 @@ +// SPDX-License-Identifier: GPL-2.0 +int main(void) +{ + return 0; +} diff --git a/tools/testing/selftests/powerpc/alignment/alignment_handler.c b/tools/testing/selftests/powerpc/alignment/alignment_handler.c index c25cf7cd45e9..33ee34fc0828 100644 --- a/tools/testing/selftests/powerpc/alignment/alignment_handler.c +++ b/tools/testing/selftests/powerpc/alignment/alignment_handler.c @@ -10,16 +10,7 @@ * * We create two sets of source and destination buffers, one in regular memory, * the other cache-inhibited (by default we use /dev/fb0 for this, but an - * alterative path for cache-inhibited memory may be provided). - * - * One way to get cache-inhibited memory is to use the "mem" kernel parameter - * to limit the kernel to less memory than actually exists. Addresses above - * the limit may still be accessed but will be treated as cache-inhibited. For - * example, if there is actually 4GB of memory and the parameter "mem=3GB" is - * used, memory from address 0xC0000000 onwards is treated as cache-inhibited. - * To access this region /dev/mem is used. The kernel should be configured - * without CONFIG_STRICT_DEVMEM. In this case use: - * ./alignment_handler /dev/mem 0xc0000000 + * alterative path for cache-inhibited memory may be provided, e.g. memtrace). * * We initialise the source buffers, then use whichever set of load/store * instructions is under test to copy bytes from the source buffers to the diff --git a/tools/testing/selftests/powerpc/mm/Makefile b/tools/testing/selftests/powerpc/mm/Makefile index defe488d6bf1..40253abc6208 100644 --- a/tools/testing/selftests/powerpc/mm/Makefile +++ b/tools/testing/selftests/powerpc/mm/Makefile @@ -5,6 +5,7 @@ noarg: TEST_GEN_PROGS := hugetlb_vs_thp_test subpage_prot prot_sao segv_errors wild_bctr \ large_vm_fork_separation bad_accesses pkey_exec_prot \ pkey_siginfo stack_expansion_signal stack_expansion_ldst +TEST_PROGS := stress_code_patching.sh TEST_GEN_PROGS_EXTENDED := tlbie_test TEST_GEN_FILES := tempfile diff --git a/tools/testing/selftests/powerpc/mm/stress_code_patching.sh b/tools/testing/selftests/powerpc/mm/stress_code_patching.sh new file mode 100755 index 000000000000..e454509659f6 --- /dev/null +++ b/tools/testing/selftests/powerpc/mm/stress_code_patching.sh @@ -0,0 +1,49 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0-or-later + +TIMEOUT=30 + +DEBUFS_DIR=`cat /proc/mounts | grep debugfs | awk '{print $2}'` +if [ ! -e "$DEBUFS_DIR" ] +then + echo "debugfs not found, skipping" 1>&2 + exit 4 +fi + +if [ ! -e "$DEBUFS_DIR/tracing/current_tracer" ] +then + echo "Tracing files not found, skipping" 1>&2 + exit 4 +fi + + +echo "Testing for spurious faults when mapping kernel memory..." + +if grep -q "FUNCTION TRACING IS CORRUPTED" "$DEBUFS_DIR/tracing/trace" +then + echo "FAILED: Ftrace already dead. Probably due to a spurious fault" 1>&2 + exit 1 +fi + +dmesg -C +START_TIME=`date +%s` +END_TIME=`expr $START_TIME + $TIMEOUT` +while [ `date +%s` -lt $END_TIME ] +do + echo function > $DEBUFS_DIR/tracing/current_tracer + echo nop > $DEBUFS_DIR/tracing/current_tracer + if dmesg | grep -q 'ftrace bug' + then + break + fi +done + +echo nop > $DEBUFS_DIR/tracing/current_tracer +if dmesg | grep -q 'ftrace bug' +then + echo "FAILED: Mapping kernel memory causes spurious faults" 1>&2 + exit 1 +else + echo "OK: Mapping kernel memory does not cause spurious faults" + exit 0 +fi diff --git a/tools/testing/selftests/powerpc/nx-gzip/gzfht_test.c b/tools/testing/selftests/powerpc/nx-gzip/gzfht_test.c index 02dffb65de48..b099753b50e4 100644 --- a/tools/testing/selftests/powerpc/nx-gzip/gzfht_test.c +++ b/tools/testing/selftests/powerpc/nx-gzip/gzfht_test.c @@ -324,7 +324,7 @@ int compress_file(int argc, char **argv, void *handle) fprintf(stderr, "error: cannot progress; "); fprintf(stderr, "too many faults\n"); exit(-1); - }; + } } fault_tries = NX_MAX_FAULTS; /* Reset for the next chunk */ diff --git a/tools/testing/selftests/powerpc/ptrace/.gitignore b/tools/testing/selftests/powerpc/ptrace/.gitignore index 0e96150b7c7e..eb75e5360e31 100644 --- a/tools/testing/selftests/powerpc/ptrace/.gitignore +++ b/tools/testing/selftests/powerpc/ptrace/.gitignore @@ -14,3 +14,4 @@ perf-hwbreak core-pkey ptrace-pkey ptrace-syscall +ptrace-perf-hwbreak diff --git a/tools/testing/selftests/powerpc/ptrace/Makefile b/tools/testing/selftests/powerpc/ptrace/Makefile index 8d3f006c98cc..a500639da97a 100644 --- a/tools/testing/selftests/powerpc/ptrace/Makefile +++ b/tools/testing/selftests/powerpc/ptrace/Makefile @@ -2,7 +2,7 @@ TEST_GEN_PROGS := ptrace-gpr ptrace-tm-gpr ptrace-tm-spd-gpr \ ptrace-tar ptrace-tm-tar ptrace-tm-spd-tar ptrace-vsx ptrace-tm-vsx \ ptrace-tm-spd-vsx ptrace-tm-spr ptrace-hwbreak ptrace-pkey core-pkey \ - perf-hwbreak ptrace-syscall + perf-hwbreak ptrace-syscall ptrace-perf-hwbreak top_srcdir = ../../../../.. include ../../lib.mk diff --git a/tools/testing/selftests/powerpc/ptrace/perf-hwbreak.c b/tools/testing/selftests/powerpc/ptrace/perf-hwbreak.c index c1f324afdbf3..ecde2c199f3b 100644 --- a/tools/testing/selftests/powerpc/ptrace/perf-hwbreak.c +++ b/tools/testing/selftests/powerpc/ptrace/perf-hwbreak.c @@ -21,8 +21,13 @@ #include <assert.h> #include <stdio.h> #include <stdlib.h> +#include <signal.h> #include <string.h> #include <sys/ioctl.h> +#include <sys/wait.h> +#include <sys/ptrace.h> +#include <sys/sysinfo.h> +#include <asm/ptrace.h> #include <elf.h> #include <pthread.h> #include <sys/syscall.h> @@ -30,32 +35,130 @@ #include <linux/hw_breakpoint.h> #include "utils.h" +#ifndef PPC_DEBUG_FEATURE_DATA_BP_ARCH_31 +#define PPC_DEBUG_FEATURE_DATA_BP_ARCH_31 0x20 +#endif + #define MAX_LOOPS 10000 #define DAWR_LENGTH_MAX ((0x3f + 1) * 8) -static inline int sys_perf_event_open(struct perf_event_attr *attr, pid_t pid, - int cpu, int group_fd, - unsigned long flags) +int nprocs; + +static volatile int a = 10; +static volatile int b = 10; +static volatile char c[512 + 8] __attribute__((aligned(512))); + +static void perf_event_attr_set(struct perf_event_attr *attr, + __u32 type, __u64 addr, __u64 len, + bool exclude_user) { - attr->size = sizeof(*attr); - return syscall(__NR_perf_event_open, attr, pid, cpu, group_fd, flags); + memset(attr, 0, sizeof(struct perf_event_attr)); + attr->type = PERF_TYPE_BREAKPOINT; + attr->size = sizeof(struct perf_event_attr); + attr->bp_type = type; + attr->bp_addr = addr; + attr->bp_len = len; + attr->exclude_kernel = 1; + attr->exclude_hv = 1; + attr->exclude_guest = 1; + attr->exclude_user = exclude_user; + attr->disabled = 1; } -static inline bool breakpoint_test(int len) +static int +perf_process_event_open_exclude_user(__u32 type, __u64 addr, __u64 len, bool exclude_user) +{ + struct perf_event_attr attr; + + perf_event_attr_set(&attr, type, addr, len, exclude_user); + return syscall(__NR_perf_event_open, &attr, getpid(), -1, -1, 0); +} + +static int perf_process_event_open(__u32 type, __u64 addr, __u64 len) +{ + struct perf_event_attr attr; + + perf_event_attr_set(&attr, type, addr, len, 0); + return syscall(__NR_perf_event_open, &attr, getpid(), -1, -1, 0); +} + +static int perf_cpu_event_open(long cpu, __u32 type, __u64 addr, __u64 len) { struct perf_event_attr attr; + + perf_event_attr_set(&attr, type, addr, len, 0); + return syscall(__NR_perf_event_open, &attr, -1, cpu, -1, 0); +} + +static void close_fds(int *fd, int n) +{ + int i; + + for (i = 0; i < n; i++) + close(fd[i]); +} + +static unsigned long read_fds(int *fd, int n) +{ + int i; + unsigned long c = 0; + unsigned long count = 0; + size_t res; + + for (i = 0; i < n; i++) { + res = read(fd[i], &c, sizeof(c)); + assert(res == sizeof(unsigned long long)); + count += c; + } + return count; +} + +static void reset_fds(int *fd, int n) +{ + int i; + + for (i = 0; i < n; i++) + ioctl(fd[i], PERF_EVENT_IOC_RESET); +} + +static void enable_fds(int *fd, int n) +{ + int i; + + for (i = 0; i < n; i++) + ioctl(fd[i], PERF_EVENT_IOC_ENABLE); +} + +static void disable_fds(int *fd, int n) +{ + int i; + + for (i = 0; i < n; i++) + ioctl(fd[i], PERF_EVENT_IOC_DISABLE); +} + +static int perf_systemwide_event_open(int *fd, __u32 type, __u64 addr, __u64 len) +{ + int i = 0; + + /* Assume online processors are 0 to nprocs for simplisity */ + for (i = 0; i < nprocs; i++) { + fd[i] = perf_cpu_event_open(i, type, addr, len); + if (fd[i] < 0) { + close_fds(fd, i); + return fd[i]; + } + } + return 0; +} + +static inline bool breakpoint_test(int len) +{ int fd; - /* setup counters */ - memset(&attr, 0, sizeof(attr)); - attr.disabled = 1; - attr.type = PERF_TYPE_BREAKPOINT; - attr.bp_type = HW_BREAKPOINT_R; /* bp_addr can point anywhere but needs to be aligned */ - attr.bp_addr = (__u64)(&attr) & 0xfffffffffffff800; - attr.bp_len = len; - fd = sys_perf_event_open(&attr, 0, -1, -1, 0); + fd = perf_process_event_open(HW_BREAKPOINT_R, (__u64)(&fd) & 0xfffffffffffff800, len); if (fd < 0) return false; close(fd); @@ -75,7 +178,6 @@ static inline bool dawr_supported(void) static int runtestsingle(int readwriteflag, int exclude_user, int arraytest) { int i,j; - struct perf_event_attr attr; size_t res; unsigned long long breaks, needed; int readint; @@ -85,6 +187,7 @@ static int runtestsingle(int readwriteflag, int exclude_user, int arraytest) int break_fd; int loop_num = MAX_LOOPS - (rand() % 100); /* provide some variability */ volatile int *k; + __u64 len; /* align to 0x400 boundary as required by DAWR */ readintalign = (int *)(((unsigned long)readintarraybig + 0x7ff) & @@ -94,19 +197,11 @@ static int runtestsingle(int readwriteflag, int exclude_user, int arraytest) if (arraytest) ptr = &readintalign[0]; - /* setup counters */ - memset(&attr, 0, sizeof(attr)); - attr.disabled = 1; - attr.type = PERF_TYPE_BREAKPOINT; - attr.bp_type = readwriteflag; - attr.bp_addr = (__u64)ptr; - attr.bp_len = sizeof(int); - if (arraytest) - attr.bp_len = DAWR_LENGTH_MAX; - attr.exclude_user = exclude_user; - break_fd = sys_perf_event_open(&attr, 0, -1, -1, 0); + len = arraytest ? DAWR_LENGTH_MAX : sizeof(int); + break_fd = perf_process_event_open_exclude_user(readwriteflag, (__u64)ptr, + len, exclude_user); if (break_fd < 0) { - perror("sys_perf_event_open"); + perror("perf_process_event_open_exclude_user"); exit(1); } @@ -153,7 +248,6 @@ static int runtest_dar_outside(void) void *target; volatile __u16 temp16; volatile __u64 temp64; - struct perf_event_attr attr; int break_fd; unsigned long long breaks; int fail = 0; @@ -165,21 +259,11 @@ static int runtest_dar_outside(void) exit(EXIT_FAILURE); } - /* setup counters */ - memset(&attr, 0, sizeof(attr)); - attr.disabled = 1; - attr.type = PERF_TYPE_BREAKPOINT; - attr.exclude_kernel = 1; - attr.exclude_hv = 1; - attr.exclude_guest = 1; - attr.bp_type = HW_BREAKPOINT_RW; /* watch middle half of target array */ - attr.bp_addr = (__u64)(target + 2); - attr.bp_len = 4; - break_fd = sys_perf_event_open(&attr, 0, -1, -1, 0); + break_fd = perf_process_event_open(HW_BREAKPOINT_RW, (__u64)(target + 2), 4); if (break_fd < 0) { free(target); - perror("sys_perf_event_open"); + perror("perf_process_event_open"); exit(EXIT_FAILURE); } @@ -263,11 +347,467 @@ static int runtest_dar_outside(void) return fail; } +static void multi_dawr_workload(void) +{ + a += 10; + b += 10; + c[512 + 1] += 'a'; +} + +static int test_process_multi_diff_addr(void) +{ + unsigned long long breaks1 = 0, breaks2 = 0; + int fd1, fd2; + char *desc = "Process specific, Two events, diff addr"; + size_t res; + + fd1 = perf_process_event_open(HW_BREAKPOINT_RW, (__u64)&a, (__u64)sizeof(a)); + if (fd1 < 0) { + perror("perf_process_event_open"); + exit(EXIT_FAILURE); + } + + fd2 = perf_process_event_open(HW_BREAKPOINT_RW, (__u64)&b, (__u64)sizeof(b)); + if (fd2 < 0) { + close(fd1); + perror("perf_process_event_open"); + exit(EXIT_FAILURE); + } + + ioctl(fd1, PERF_EVENT_IOC_RESET); + ioctl(fd2, PERF_EVENT_IOC_RESET); + ioctl(fd1, PERF_EVENT_IOC_ENABLE); + ioctl(fd2, PERF_EVENT_IOC_ENABLE); + multi_dawr_workload(); + ioctl(fd1, PERF_EVENT_IOC_DISABLE); + ioctl(fd2, PERF_EVENT_IOC_DISABLE); + + res = read(fd1, &breaks1, sizeof(breaks1)); + assert(res == sizeof(unsigned long long)); + res = read(fd2, &breaks2, sizeof(breaks2)); + assert(res == sizeof(unsigned long long)); + + close(fd1); + close(fd2); + + if (breaks1 != 2 || breaks2 != 2) { + printf("FAILED: %s: %lld != 2 || %lld != 2\n", desc, breaks1, breaks2); + return 1; + } + + printf("TESTED: %s\n", desc); + return 0; +} + +static int test_process_multi_same_addr(void) +{ + unsigned long long breaks1 = 0, breaks2 = 0; + int fd1, fd2; + char *desc = "Process specific, Two events, same addr"; + size_t res; + + fd1 = perf_process_event_open(HW_BREAKPOINT_RW, (__u64)&a, (__u64)sizeof(a)); + if (fd1 < 0) { + perror("perf_process_event_open"); + exit(EXIT_FAILURE); + } + + fd2 = perf_process_event_open(HW_BREAKPOINT_RW, (__u64)&a, (__u64)sizeof(a)); + if (fd2 < 0) { + close(fd1); + perror("perf_process_event_open"); + exit(EXIT_FAILURE); + } + + ioctl(fd1, PERF_EVENT_IOC_RESET); + ioctl(fd2, PERF_EVENT_IOC_RESET); + ioctl(fd1, PERF_EVENT_IOC_ENABLE); + ioctl(fd2, PERF_EVENT_IOC_ENABLE); + multi_dawr_workload(); + ioctl(fd1, PERF_EVENT_IOC_DISABLE); + ioctl(fd2, PERF_EVENT_IOC_DISABLE); + + res = read(fd1, &breaks1, sizeof(breaks1)); + assert(res == sizeof(unsigned long long)); + res = read(fd2, &breaks2, sizeof(breaks2)); + assert(res == sizeof(unsigned long long)); + + close(fd1); + close(fd2); + + if (breaks1 != 2 || breaks2 != 2) { + printf("FAILED: %s: %lld != 2 || %lld != 2\n", desc, breaks1, breaks2); + return 1; + } + + printf("TESTED: %s\n", desc); + return 0; +} + +static int test_process_multi_diff_addr_ro_wo(void) +{ + unsigned long long breaks1 = 0, breaks2 = 0; + int fd1, fd2; + char *desc = "Process specific, Two events, diff addr, one is RO, other is WO"; + size_t res; + + fd1 = perf_process_event_open(HW_BREAKPOINT_W, (__u64)&a, (__u64)sizeof(a)); + if (fd1 < 0) { + perror("perf_process_event_open"); + exit(EXIT_FAILURE); + } + + fd2 = perf_process_event_open(HW_BREAKPOINT_R, (__u64)&b, (__u64)sizeof(b)); + if (fd2 < 0) { + close(fd1); + perror("perf_process_event_open"); + exit(EXIT_FAILURE); + } + + ioctl(fd1, PERF_EVENT_IOC_RESET); + ioctl(fd2, PERF_EVENT_IOC_RESET); + ioctl(fd1, PERF_EVENT_IOC_ENABLE); + ioctl(fd2, PERF_EVENT_IOC_ENABLE); + multi_dawr_workload(); + ioctl(fd1, PERF_EVENT_IOC_DISABLE); + ioctl(fd2, PERF_EVENT_IOC_DISABLE); + + res = read(fd1, &breaks1, sizeof(breaks1)); + assert(res == sizeof(unsigned long long)); + res = read(fd2, &breaks2, sizeof(breaks2)); + assert(res == sizeof(unsigned long long)); + + close(fd1); + close(fd2); + + if (breaks1 != 1 || breaks2 != 1) { + printf("FAILED: %s: %lld != 1 || %lld != 1\n", desc, breaks1, breaks2); + return 1; + } + + printf("TESTED: %s\n", desc); + return 0; +} + +static int test_process_multi_same_addr_ro_wo(void) +{ + unsigned long long breaks1 = 0, breaks2 = 0; + int fd1, fd2; + char *desc = "Process specific, Two events, same addr, one is RO, other is WO"; + size_t res; + + fd1 = perf_process_event_open(HW_BREAKPOINT_R, (__u64)&a, (__u64)sizeof(a)); + if (fd1 < 0) { + perror("perf_process_event_open"); + exit(EXIT_FAILURE); + } + + fd2 = perf_process_event_open(HW_BREAKPOINT_W, (__u64)&a, (__u64)sizeof(a)); + if (fd2 < 0) { + close(fd1); + perror("perf_process_event_open"); + exit(EXIT_FAILURE); + } + + ioctl(fd1, PERF_EVENT_IOC_RESET); + ioctl(fd2, PERF_EVENT_IOC_RESET); + ioctl(fd1, PERF_EVENT_IOC_ENABLE); + ioctl(fd2, PERF_EVENT_IOC_ENABLE); + multi_dawr_workload(); + ioctl(fd1, PERF_EVENT_IOC_DISABLE); + ioctl(fd2, PERF_EVENT_IOC_DISABLE); + + res = read(fd1, &breaks1, sizeof(breaks1)); + assert(res == sizeof(unsigned long long)); + res = read(fd2, &breaks2, sizeof(breaks2)); + assert(res == sizeof(unsigned long long)); + + close(fd1); + close(fd2); + + if (breaks1 != 1 || breaks2 != 1) { + printf("FAILED: %s: %lld != 1 || %lld != 1\n", desc, breaks1, breaks2); + return 1; + } + + printf("TESTED: %s\n", desc); + return 0; +} + +static int test_syswide_multi_diff_addr(void) +{ + unsigned long long breaks1 = 0, breaks2 = 0; + int *fd1 = malloc(nprocs * sizeof(int)); + int *fd2 = malloc(nprocs * sizeof(int)); + char *desc = "Systemwide, Two events, diff addr"; + int ret; + + ret = perf_systemwide_event_open(fd1, HW_BREAKPOINT_RW, (__u64)&a, (__u64)sizeof(a)); + if (ret) { + perror("perf_systemwide_event_open"); + exit(EXIT_FAILURE); + } + + ret = perf_systemwide_event_open(fd2, HW_BREAKPOINT_RW, (__u64)&b, (__u64)sizeof(b)); + if (ret) { + close_fds(fd1, nprocs); + perror("perf_systemwide_event_open"); + exit(EXIT_FAILURE); + } + + reset_fds(fd1, nprocs); + reset_fds(fd2, nprocs); + enable_fds(fd1, nprocs); + enable_fds(fd2, nprocs); + multi_dawr_workload(); + disable_fds(fd1, nprocs); + disable_fds(fd2, nprocs); + + breaks1 = read_fds(fd1, nprocs); + breaks2 = read_fds(fd2, nprocs); + + close_fds(fd1, nprocs); + close_fds(fd2, nprocs); + + free(fd1); + free(fd2); + + if (breaks1 != 2 || breaks2 != 2) { + printf("FAILED: %s: %lld != 2 || %lld != 2\n", desc, breaks1, breaks2); + return 1; + } + + printf("TESTED: %s\n", desc); + return 0; +} + +static int test_syswide_multi_same_addr(void) +{ + unsigned long long breaks1 = 0, breaks2 = 0; + int *fd1 = malloc(nprocs * sizeof(int)); + int *fd2 = malloc(nprocs * sizeof(int)); + char *desc = "Systemwide, Two events, same addr"; + int ret; + + ret = perf_systemwide_event_open(fd1, HW_BREAKPOINT_RW, (__u64)&a, (__u64)sizeof(a)); + if (ret) { + perror("perf_systemwide_event_open"); + exit(EXIT_FAILURE); + } + + ret = perf_systemwide_event_open(fd2, HW_BREAKPOINT_RW, (__u64)&a, (__u64)sizeof(a)); + if (ret) { + close_fds(fd1, nprocs); + perror("perf_systemwide_event_open"); + exit(EXIT_FAILURE); + } + + reset_fds(fd1, nprocs); + reset_fds(fd2, nprocs); + enable_fds(fd1, nprocs); + enable_fds(fd2, nprocs); + multi_dawr_workload(); + disable_fds(fd1, nprocs); + disable_fds(fd2, nprocs); + + breaks1 = read_fds(fd1, nprocs); + breaks2 = read_fds(fd2, nprocs); + + close_fds(fd1, nprocs); + close_fds(fd2, nprocs); + + free(fd1); + free(fd2); + + if (breaks1 != 2 || breaks2 != 2) { + printf("FAILED: %s: %lld != 2 || %lld != 2\n", desc, breaks1, breaks2); + return 1; + } + + printf("TESTED: %s\n", desc); + return 0; +} + +static int test_syswide_multi_diff_addr_ro_wo(void) +{ + unsigned long long breaks1 = 0, breaks2 = 0; + int *fd1 = malloc(nprocs * sizeof(int)); + int *fd2 = malloc(nprocs * sizeof(int)); + char *desc = "Systemwide, Two events, diff addr, one is RO, other is WO"; + int ret; + + ret = perf_systemwide_event_open(fd1, HW_BREAKPOINT_W, (__u64)&a, (__u64)sizeof(a)); + if (ret) { + perror("perf_systemwide_event_open"); + exit(EXIT_FAILURE); + } + + ret = perf_systemwide_event_open(fd2, HW_BREAKPOINT_R, (__u64)&b, (__u64)sizeof(b)); + if (ret) { + close_fds(fd1, nprocs); + perror("perf_systemwide_event_open"); + exit(EXIT_FAILURE); + } + + reset_fds(fd1, nprocs); + reset_fds(fd2, nprocs); + enable_fds(fd1, nprocs); + enable_fds(fd2, nprocs); + multi_dawr_workload(); + disable_fds(fd1, nprocs); + disable_fds(fd2, nprocs); + + breaks1 = read_fds(fd1, nprocs); + breaks2 = read_fds(fd2, nprocs); + + close_fds(fd1, nprocs); + close_fds(fd2, nprocs); + + free(fd1); + free(fd2); + + if (breaks1 != 1 || breaks2 != 1) { + printf("FAILED: %s: %lld != 1 || %lld != 1\n", desc, breaks1, breaks2); + return 1; + } + + printf("TESTED: %s\n", desc); + return 0; +} + +static int test_syswide_multi_same_addr_ro_wo(void) +{ + unsigned long long breaks1 = 0, breaks2 = 0; + int *fd1 = malloc(nprocs * sizeof(int)); + int *fd2 = malloc(nprocs * sizeof(int)); + char *desc = "Systemwide, Two events, same addr, one is RO, other is WO"; + int ret; + + ret = perf_systemwide_event_open(fd1, HW_BREAKPOINT_W, (__u64)&a, (__u64)sizeof(a)); + if (ret) { + perror("perf_systemwide_event_open"); + exit(EXIT_FAILURE); + } + + ret = perf_systemwide_event_open(fd2, HW_BREAKPOINT_R, (__u64)&a, (__u64)sizeof(a)); + if (ret) { + close_fds(fd1, nprocs); + perror("perf_systemwide_event_open"); + exit(EXIT_FAILURE); + } + + reset_fds(fd1, nprocs); + reset_fds(fd2, nprocs); + enable_fds(fd1, nprocs); + enable_fds(fd2, nprocs); + multi_dawr_workload(); + disable_fds(fd1, nprocs); + disable_fds(fd2, nprocs); + + breaks1 = read_fds(fd1, nprocs); + breaks2 = read_fds(fd2, nprocs); + + close_fds(fd1, nprocs); + close_fds(fd2, nprocs); + + free(fd1); + free(fd2); + + if (breaks1 != 1 || breaks2 != 1) { + printf("FAILED: %s: %lld != 1 || %lld != 1\n", desc, breaks1, breaks2); + return 1; + } + + printf("TESTED: %s\n", desc); + return 0; +} + +static int runtest_multi_dawr(void) +{ + int ret = 0; + + ret |= test_process_multi_diff_addr(); + ret |= test_process_multi_same_addr(); + ret |= test_process_multi_diff_addr_ro_wo(); + ret |= test_process_multi_same_addr_ro_wo(); + ret |= test_syswide_multi_diff_addr(); + ret |= test_syswide_multi_same_addr(); + ret |= test_syswide_multi_diff_addr_ro_wo(); + ret |= test_syswide_multi_same_addr_ro_wo(); + + return ret; +} + +static int runtest_unaligned_512bytes(void) +{ + unsigned long long breaks = 0; + int fd; + char *desc = "Process specific, 512 bytes, unaligned"; + __u64 addr = (__u64)&c + 8; + size_t res; + + fd = perf_process_event_open(HW_BREAKPOINT_RW, addr, 512); + if (fd < 0) { + perror("perf_process_event_open"); + exit(EXIT_FAILURE); + } + + ioctl(fd, PERF_EVENT_IOC_RESET); + ioctl(fd, PERF_EVENT_IOC_ENABLE); + multi_dawr_workload(); + ioctl(fd, PERF_EVENT_IOC_DISABLE); + + res = read(fd, &breaks, sizeof(breaks)); + assert(res == sizeof(unsigned long long)); + + close(fd); + + if (breaks != 2) { + printf("FAILED: %s: %lld != 2\n", desc, breaks); + return 1; + } + + printf("TESTED: %s\n", desc); + return 0; +} + +/* There is no perf api to find number of available watchpoints. Use ptrace. */ +static int get_nr_wps(bool *arch_31) +{ + struct ppc_debug_info dbginfo; + int child_pid; + + child_pid = fork(); + if (!child_pid) { + int ret = ptrace(PTRACE_TRACEME, 0, NULL, 0); + if (ret) { + perror("PTRACE_TRACEME failed\n"); + exit(EXIT_FAILURE); + } + kill(getpid(), SIGUSR1); + + sleep(1); + exit(EXIT_SUCCESS); + } + + wait(NULL); + if (ptrace(PPC_PTRACE_GETHWDBGINFO, child_pid, NULL, &dbginfo)) { + perror("Can't get breakpoint info"); + exit(EXIT_FAILURE); + } + + *arch_31 = !!(dbginfo.features & PPC_DEBUG_FEATURE_DATA_BP_ARCH_31); + return dbginfo.num_data_bps; +} + static int runtest(void) { int rwflag; int exclude_user; int ret; + bool dawr = dawr_supported(); + bool arch_31 = false; + int nr_wps = get_nr_wps(&arch_31); /* * perf defines rwflag as two bits read and write and at least @@ -280,7 +820,7 @@ static int runtest(void) return ret; /* if we have the dawr, we can do an array test */ - if (!dawr_supported()) + if (!dawr) continue; ret = runtestsingle(rwflag, exclude_user, 1); if (ret) @@ -289,6 +829,19 @@ static int runtest(void) } ret = runtest_dar_outside(); + if (ret) + return ret; + + if (dawr && nr_wps > 1) { + nprocs = get_nprocs(); + ret = runtest_multi_dawr(); + if (ret) + return ret; + } + + if (dawr && arch_31) + ret = runtest_unaligned_512bytes(); + return ret; } diff --git a/tools/testing/selftests/powerpc/ptrace/ptrace-hwbreak.c b/tools/testing/selftests/powerpc/ptrace/ptrace-hwbreak.c index 2e0d86e0687e..a0635a3819aa 100644 --- a/tools/testing/selftests/powerpc/ptrace/ptrace-hwbreak.c +++ b/tools/testing/selftests/powerpc/ptrace/ptrace-hwbreak.c @@ -194,6 +194,18 @@ static void test_workload(void) big_var[rand() % DAWR_MAX_LEN] = 'a'; else cvar = big_var[rand() % DAWR_MAX_LEN]; + + /* PPC_PTRACE_SETHWDEBUG 2, MODE_RANGE, DW ALIGNED, WO test */ + gstruct.a[rand() % A_LEN] = 'a'; + + /* PPC_PTRACE_SETHWDEBUG 2, MODE_RANGE, DW UNALIGNED, RO test */ + cvar = gstruct.b[rand() % B_LEN]; + + /* PPC_PTRACE_SETHWDEBUG 2, MODE_RANGE, DAWR Overlap, WO test */ + gstruct.a[rand() % A_LEN] = 'a'; + + /* PPC_PTRACE_SETHWDEBUG 2, MODE_RANGE, DAWR Overlap, RO test */ + cvar = gstruct.a[rand() % A_LEN]; } static void check_success(pid_t child_pid, const char *name, const char *type, @@ -417,6 +429,69 @@ static void test_sethwdebug_range_aligned(pid_t child_pid) ptrace_delhwdebug(child_pid, wh); } +static void test_multi_sethwdebug_range(pid_t child_pid) +{ + struct ppc_hw_breakpoint info1, info2; + unsigned long wp_addr1, wp_addr2; + char *name1 = "PPC_PTRACE_SETHWDEBUG 2, MODE_RANGE, DW ALIGNED"; + char *name2 = "PPC_PTRACE_SETHWDEBUG 2, MODE_RANGE, DW UNALIGNED"; + int len1, len2; + int wh1, wh2; + + wp_addr1 = (unsigned long)&gstruct.a; + wp_addr2 = (unsigned long)&gstruct.b; + len1 = A_LEN; + len2 = B_LEN; + get_ppc_hw_breakpoint(&info1, PPC_BREAKPOINT_TRIGGER_WRITE, wp_addr1, len1); + get_ppc_hw_breakpoint(&info2, PPC_BREAKPOINT_TRIGGER_READ, wp_addr2, len2); + + /* PPC_PTRACE_SETHWDEBUG 2, MODE_RANGE, DW ALIGNED, WO test */ + wh1 = ptrace_sethwdebug(child_pid, &info1); + + /* PPC_PTRACE_SETHWDEBUG 2, MODE_RANGE, DW UNALIGNED, RO test */ + wh2 = ptrace_sethwdebug(child_pid, &info2); + + ptrace(PTRACE_CONT, child_pid, NULL, 0); + check_success(child_pid, name1, "WO", wp_addr1, len1); + + ptrace(PTRACE_CONT, child_pid, NULL, 0); + check_success(child_pid, name2, "RO", wp_addr2, len2); + + ptrace_delhwdebug(child_pid, wh1); + ptrace_delhwdebug(child_pid, wh2); +} + +static void test_multi_sethwdebug_range_dawr_overlap(pid_t child_pid) +{ + struct ppc_hw_breakpoint info1, info2; + unsigned long wp_addr1, wp_addr2; + char *name = "PPC_PTRACE_SETHWDEBUG 2, MODE_RANGE, DAWR Overlap"; + int len1, len2; + int wh1, wh2; + + wp_addr1 = (unsigned long)&gstruct.a; + wp_addr2 = (unsigned long)&gstruct.a; + len1 = A_LEN; + len2 = A_LEN; + get_ppc_hw_breakpoint(&info1, PPC_BREAKPOINT_TRIGGER_WRITE, wp_addr1, len1); + get_ppc_hw_breakpoint(&info2, PPC_BREAKPOINT_TRIGGER_READ, wp_addr2, len2); + + /* PPC_PTRACE_SETHWDEBUG 2, MODE_RANGE, DAWR Overlap, WO test */ + wh1 = ptrace_sethwdebug(child_pid, &info1); + + /* PPC_PTRACE_SETHWDEBUG 2, MODE_RANGE, DAWR Overlap, RO test */ + wh2 = ptrace_sethwdebug(child_pid, &info2); + + ptrace(PTRACE_CONT, child_pid, NULL, 0); + check_success(child_pid, name, "WO", wp_addr1, len1); + + ptrace(PTRACE_CONT, child_pid, NULL, 0); + check_success(child_pid, name, "RO", wp_addr2, len2); + + ptrace_delhwdebug(child_pid, wh1); + ptrace_delhwdebug(child_pid, wh2); +} + static void test_sethwdebug_range_unaligned(pid_t child_pid) { struct ppc_hw_breakpoint info; @@ -504,6 +579,10 @@ run_tests(pid_t child_pid, struct ppc_debug_info *dbginfo, bool dawr) test_sethwdebug_range_unaligned(child_pid); test_sethwdebug_range_unaligned_dar(child_pid); test_sethwdebug_dawr_max_range(child_pid); + if (dbginfo->num_data_bps > 1) { + test_multi_sethwdebug_range(child_pid); + test_multi_sethwdebug_range_dawr_overlap(child_pid); + } } } } diff --git a/tools/testing/selftests/powerpc/ptrace/ptrace-perf-hwbreak.c b/tools/testing/selftests/powerpc/ptrace/ptrace-perf-hwbreak.c new file mode 100644 index 000000000000..3344e74a97b4 --- /dev/null +++ b/tools/testing/selftests/powerpc/ptrace/ptrace-perf-hwbreak.c @@ -0,0 +1,659 @@ +// SPDX-License-Identifier: GPL-2.0+ +#include <stdio.h> +#include <string.h> +#include <signal.h> +#include <stdlib.h> +#include <unistd.h> +#include <errno.h> +#include <linux/hw_breakpoint.h> +#include <linux/perf_event.h> +#include <asm/unistd.h> +#include <sys/ptrace.h> +#include <sys/wait.h> +#include "ptrace.h" + +char data[16]; + +/* Overlapping address range */ +volatile __u64 *ptrace_data1 = (__u64 *)&data[0]; +volatile __u64 *perf_data1 = (__u64 *)&data[4]; + +/* Non-overlapping address range */ +volatile __u64 *ptrace_data2 = (__u64 *)&data[0]; +volatile __u64 *perf_data2 = (__u64 *)&data[8]; + +static unsigned long pid_max_addr(void) +{ + FILE *fp; + char *line, *c; + char addr[100]; + size_t len = 0; + + fp = fopen("/proc/kallsyms", "r"); + if (!fp) { + printf("Failed to read /proc/kallsyms. Exiting..\n"); + exit(EXIT_FAILURE); + } + + while (getline(&line, &len, fp) != -1) { + if (!strstr(line, "pid_max") || strstr(line, "pid_max_max") || + strstr(line, "pid_max_min")) + continue; + + strncpy(addr, line, len < 100 ? len : 100); + c = strchr(addr, ' '); + *c = '\0'; + return strtoul(addr, &c, 16); + } + fclose(fp); + printf("Could not find pix_max. Exiting..\n"); + exit(EXIT_FAILURE); + return -1; +} + +static void perf_user_event_attr_set(struct perf_event_attr *attr, __u64 addr, __u64 len) +{ + memset(attr, 0, sizeof(struct perf_event_attr)); + attr->type = PERF_TYPE_BREAKPOINT; + attr->size = sizeof(struct perf_event_attr); + attr->bp_type = HW_BREAKPOINT_R; + attr->bp_addr = addr; + attr->bp_len = len; + attr->exclude_kernel = 1; + attr->exclude_hv = 1; +} + +static void perf_kernel_event_attr_set(struct perf_event_attr *attr) +{ + memset(attr, 0, sizeof(struct perf_event_attr)); + attr->type = PERF_TYPE_BREAKPOINT; + attr->size = sizeof(struct perf_event_attr); + attr->bp_type = HW_BREAKPOINT_R; + attr->bp_addr = pid_max_addr(); + attr->bp_len = sizeof(unsigned long); + attr->exclude_user = 1; + attr->exclude_hv = 1; +} + +static int perf_cpu_event_open(int cpu, __u64 addr, __u64 len) +{ + struct perf_event_attr attr; + + perf_user_event_attr_set(&attr, addr, len); + return syscall(__NR_perf_event_open, &attr, -1, cpu, -1, 0); +} + +static int perf_thread_event_open(pid_t child_pid, __u64 addr, __u64 len) +{ + struct perf_event_attr attr; + + perf_user_event_attr_set(&attr, addr, len); + return syscall(__NR_perf_event_open, &attr, child_pid, -1, -1, 0); +} + +static int perf_thread_cpu_event_open(pid_t child_pid, int cpu, __u64 addr, __u64 len) +{ + struct perf_event_attr attr; + + perf_user_event_attr_set(&attr, addr, len); + return syscall(__NR_perf_event_open, &attr, child_pid, cpu, -1, 0); +} + +static int perf_thread_kernel_event_open(pid_t child_pid) +{ + struct perf_event_attr attr; + + perf_kernel_event_attr_set(&attr); + return syscall(__NR_perf_event_open, &attr, child_pid, -1, -1, 0); +} + +static int perf_cpu_kernel_event_open(int cpu) +{ + struct perf_event_attr attr; + + perf_kernel_event_attr_set(&attr); + return syscall(__NR_perf_event_open, &attr, -1, cpu, -1, 0); +} + +static int child(void) +{ + int ret; + + ret = ptrace(PTRACE_TRACEME, 0, NULL, 0); + if (ret) { + printf("Error: PTRACE_TRACEME failed\n"); + return 0; + } + kill(getpid(), SIGUSR1); /* --> parent (SIGUSR1) */ + + return 0; +} + +static void ptrace_ppc_hw_breakpoint(struct ppc_hw_breakpoint *info, int type, + __u64 addr, int len) +{ + info->version = 1; + info->trigger_type = type; + info->condition_mode = PPC_BREAKPOINT_CONDITION_NONE; + info->addr = addr; + info->addr2 = addr + len; + info->condition_value = 0; + if (!len) + info->addr_mode = PPC_BREAKPOINT_MODE_EXACT; + else + info->addr_mode = PPC_BREAKPOINT_MODE_RANGE_INCLUSIVE; +} + +static int ptrace_open(pid_t child_pid, __u64 wp_addr, int len) +{ + struct ppc_hw_breakpoint info; + + ptrace_ppc_hw_breakpoint(&info, PPC_BREAKPOINT_TRIGGER_RW, wp_addr, len); + return ptrace(PPC_PTRACE_SETHWDEBUG, child_pid, 0, &info); +} + +static int test1(pid_t child_pid) +{ + int perf_fd; + int ptrace_fd; + int ret = 0; + + /* Test: + * if (new per thread event by ptrace) + * if (existing cpu event by perf) + * if (addr range overlaps) + * fail; + */ + + perf_fd = perf_cpu_event_open(0, (__u64)perf_data1, sizeof(*perf_data1)); + if (perf_fd < 0) + return -1; + + ptrace_fd = ptrace_open(child_pid, (__u64)ptrace_data1, sizeof(*ptrace_data1)); + if (ptrace_fd > 0 || errno != ENOSPC) + ret = -1; + + close(perf_fd); + return ret; +} + +static int test2(pid_t child_pid) +{ + int perf_fd; + int ptrace_fd; + int ret = 0; + + /* Test: + * if (new per thread event by ptrace) + * if (existing cpu event by perf) + * if (addr range does not overlaps) + * allow; + */ + + perf_fd = perf_cpu_event_open(0, (__u64)perf_data2, sizeof(*perf_data2)); + if (perf_fd < 0) + return -1; + + ptrace_fd = ptrace_open(child_pid, (__u64)ptrace_data2, sizeof(*ptrace_data2)); + if (ptrace_fd < 0) { + ret = -1; + goto perf_close; + } + ptrace(PPC_PTRACE_DELHWDEBUG, child_pid, 0, ptrace_fd); + +perf_close: + close(perf_fd); + return ret; +} + +static int test3(pid_t child_pid) +{ + int perf_fd; + int ptrace_fd; + int ret = 0; + + /* Test: + * if (new per thread event by ptrace) + * if (existing thread event by perf on the same thread) + * if (addr range overlaps) + * fail; + */ + perf_fd = perf_thread_event_open(child_pid, (__u64)perf_data1, + sizeof(*perf_data1)); + if (perf_fd < 0) + return -1; + + ptrace_fd = ptrace_open(child_pid, (__u64)ptrace_data1, sizeof(*ptrace_data1)); + if (ptrace_fd > 0 || errno != ENOSPC) + ret = -1; + + close(perf_fd); + return ret; +} + +static int test4(pid_t child_pid) +{ + int perf_fd; + int ptrace_fd; + int ret = 0; + + /* Test: + * if (new per thread event by ptrace) + * if (existing thread event by perf on the same thread) + * if (addr range does not overlaps) + * fail; + */ + perf_fd = perf_thread_event_open(child_pid, (__u64)perf_data2, + sizeof(*perf_data2)); + if (perf_fd < 0) + return -1; + + ptrace_fd = ptrace_open(child_pid, (__u64)ptrace_data2, sizeof(*ptrace_data2)); + if (ptrace_fd < 0) { + ret = -1; + goto perf_close; + } + ptrace(PPC_PTRACE_DELHWDEBUG, child_pid, 0, ptrace_fd); + +perf_close: + close(perf_fd); + return ret; +} + +static int test5(pid_t child_pid) +{ + int perf_fd; + int ptrace_fd; + int cpid; + int ret = 0; + + /* Test: + * if (new per thread event by ptrace) + * if (existing thread event by perf on the different thread) + * allow; + */ + cpid = fork(); + if (!cpid) { + /* Temporary Child */ + pause(); + exit(EXIT_SUCCESS); + } + + perf_fd = perf_thread_event_open(cpid, (__u64)perf_data1, sizeof(*perf_data1)); + if (perf_fd < 0) { + ret = -1; + goto kill_child; + } + + ptrace_fd = ptrace_open(child_pid, (__u64)ptrace_data1, sizeof(*ptrace_data1)); + if (ptrace_fd < 0) { + ret = -1; + goto perf_close; + } + + ptrace(PPC_PTRACE_DELHWDEBUG, child_pid, 0, ptrace_fd); +perf_close: + close(perf_fd); +kill_child: + kill(cpid, SIGINT); + return ret; +} + +static int test6(pid_t child_pid) +{ + int perf_fd; + int ptrace_fd; + int ret = 0; + + /* Test: + * if (new per thread kernel event by perf) + * if (existing thread event by ptrace on the same thread) + * allow; + * -- OR -- + * if (new per cpu kernel event by perf) + * if (existing thread event by ptrace) + * allow; + */ + ptrace_fd = ptrace_open(child_pid, (__u64)ptrace_data1, sizeof(*ptrace_data1)); + if (ptrace_fd < 0) + return -1; + + perf_fd = perf_thread_kernel_event_open(child_pid); + if (perf_fd < 0) { + ret = -1; + goto ptrace_close; + } + close(perf_fd); + + perf_fd = perf_cpu_kernel_event_open(0); + if (perf_fd < 0) { + ret = -1; + goto ptrace_close; + } + close(perf_fd); + +ptrace_close: + ptrace(PPC_PTRACE_DELHWDEBUG, child_pid, 0, ptrace_fd); + return ret; +} + +static int test7(pid_t child_pid) +{ + int perf_fd; + int ptrace_fd; + int ret = 0; + + /* Test: + * if (new per thread event by perf) + * if (existing thread event by ptrace on the same thread) + * if (addr range overlaps) + * fail; + */ + ptrace_fd = ptrace_open(child_pid, (__u64)ptrace_data1, sizeof(*ptrace_data1)); + if (ptrace_fd < 0) + return -1; + + perf_fd = perf_thread_event_open(child_pid, (__u64)perf_data1, + sizeof(*perf_data1)); + if (perf_fd > 0 || errno != ENOSPC) + ret = -1; + + ptrace(PPC_PTRACE_DELHWDEBUG, child_pid, 0, ptrace_fd); + return ret; +} + +static int test8(pid_t child_pid) +{ + int perf_fd; + int ptrace_fd; + int ret = 0; + + /* Test: + * if (new per thread event by perf) + * if (existing thread event by ptrace on the same thread) + * if (addr range does not overlaps) + * allow; + */ + ptrace_fd = ptrace_open(child_pid, (__u64)ptrace_data2, sizeof(*ptrace_data2)); + if (ptrace_fd < 0) + return -1; + + perf_fd = perf_thread_event_open(child_pid, (__u64)perf_data2, + sizeof(*perf_data2)); + if (perf_fd < 0) { + ret = -1; + goto ptrace_close; + } + close(perf_fd); + +ptrace_close: + ptrace(PPC_PTRACE_DELHWDEBUG, child_pid, 0, ptrace_fd); + return ret; +} + +static int test9(pid_t child_pid) +{ + int perf_fd; + int ptrace_fd; + int cpid; + int ret = 0; + + /* Test: + * if (new per thread event by perf) + * if (existing thread event by ptrace on the other thread) + * allow; + */ + ptrace_fd = ptrace_open(child_pid, (__u64)ptrace_data1, sizeof(*ptrace_data1)); + if (ptrace_fd < 0) + return -1; + + cpid = fork(); + if (!cpid) { + /* Temporary Child */ + pause(); + exit(EXIT_SUCCESS); + } + + perf_fd = perf_thread_event_open(cpid, (__u64)perf_data1, sizeof(*perf_data1)); + if (perf_fd < 0) { + ret = -1; + goto kill_child; + } + close(perf_fd); + +kill_child: + kill(cpid, SIGINT); + ptrace(PPC_PTRACE_DELHWDEBUG, child_pid, 0, ptrace_fd); + return ret; +} + +static int test10(pid_t child_pid) +{ + int perf_fd; + int ptrace_fd; + int ret = 0; + + /* Test: + * if (new per cpu event by perf) + * if (existing thread event by ptrace on the same thread) + * if (addr range overlaps) + * fail; + */ + ptrace_fd = ptrace_open(child_pid, (__u64)ptrace_data1, sizeof(*ptrace_data1)); + if (ptrace_fd < 0) + return -1; + + perf_fd = perf_cpu_event_open(0, (__u64)perf_data1, sizeof(*perf_data1)); + if (perf_fd > 0 || errno != ENOSPC) + ret = -1; + + ptrace(PPC_PTRACE_DELHWDEBUG, child_pid, 0, ptrace_fd); + return ret; +} + +static int test11(pid_t child_pid) +{ + int perf_fd; + int ptrace_fd; + int ret = 0; + + /* Test: + * if (new per cpu event by perf) + * if (existing thread event by ptrace on the same thread) + * if (addr range does not overlap) + * allow; + */ + ptrace_fd = ptrace_open(child_pid, (__u64)ptrace_data2, sizeof(*ptrace_data2)); + if (ptrace_fd < 0) + return -1; + + perf_fd = perf_cpu_event_open(0, (__u64)perf_data2, sizeof(*perf_data2)); + if (perf_fd < 0) { + ret = -1; + goto ptrace_close; + } + close(perf_fd); + +ptrace_close: + ptrace(PPC_PTRACE_DELHWDEBUG, child_pid, 0, ptrace_fd); + return ret; +} + +static int test12(pid_t child_pid) +{ + int perf_fd; + int ptrace_fd; + int ret = 0; + + /* Test: + * if (new per thread and per cpu event by perf) + * if (existing thread event by ptrace on the same thread) + * if (addr range overlaps) + * fail; + */ + ptrace_fd = ptrace_open(child_pid, (__u64)ptrace_data1, sizeof(*ptrace_data1)); + if (ptrace_fd < 0) + return -1; + + perf_fd = perf_thread_cpu_event_open(child_pid, 0, (__u64)perf_data1, sizeof(*perf_data1)); + if (perf_fd > 0 || errno != ENOSPC) + ret = -1; + + ptrace(PPC_PTRACE_DELHWDEBUG, child_pid, 0, ptrace_fd); + return ret; +} + +static int test13(pid_t child_pid) +{ + int perf_fd; + int ptrace_fd; + int ret = 0; + + /* Test: + * if (new per thread and per cpu event by perf) + * if (existing thread event by ptrace on the same thread) + * if (addr range does not overlap) + * allow; + */ + ptrace_fd = ptrace_open(child_pid, (__u64)ptrace_data2, sizeof(*ptrace_data2)); + if (ptrace_fd < 0) + return -1; + + perf_fd = perf_thread_cpu_event_open(child_pid, 0, (__u64)perf_data2, sizeof(*perf_data2)); + if (perf_fd < 0) { + ret = -1; + goto ptrace_close; + } + close(perf_fd); + +ptrace_close: + ptrace(PPC_PTRACE_DELHWDEBUG, child_pid, 0, ptrace_fd); + return ret; +} + +static int test14(pid_t child_pid) +{ + int perf_fd; + int ptrace_fd; + int cpid; + int ret = 0; + + /* Test: + * if (new per thread and per cpu event by perf) + * if (existing thread event by ptrace on the other thread) + * allow; + */ + ptrace_fd = ptrace_open(child_pid, (__u64)ptrace_data1, sizeof(*ptrace_data1)); + if (ptrace_fd < 0) + return -1; + + cpid = fork(); + if (!cpid) { + /* Temporary Child */ + pause(); + exit(EXIT_SUCCESS); + } + + perf_fd = perf_thread_cpu_event_open(cpid, 0, (__u64)perf_data1, + sizeof(*perf_data1)); + if (perf_fd < 0) { + ret = -1; + goto kill_child; + } + close(perf_fd); + +kill_child: + kill(cpid, SIGINT); + ptrace(PPC_PTRACE_DELHWDEBUG, child_pid, 0, ptrace_fd); + return ret; +} + +static int do_test(const char *msg, int (*fun)(pid_t arg), pid_t arg) +{ + int ret; + + ret = fun(arg); + if (ret) + printf("%s: Error\n", msg); + else + printf("%s: Ok\n", msg); + return ret; +} + +char *desc[14] = { + "perf cpu event -> ptrace thread event (Overlapping)", + "perf cpu event -> ptrace thread event (Non-overlapping)", + "perf thread event -> ptrace same thread event (Overlapping)", + "perf thread event -> ptrace same thread event (Non-overlapping)", + "perf thread event -> ptrace other thread event", + "ptrace thread event -> perf kernel event", + "ptrace thread event -> perf same thread event (Overlapping)", + "ptrace thread event -> perf same thread event (Non-overlapping)", + "ptrace thread event -> perf other thread event", + "ptrace thread event -> perf cpu event (Overlapping)", + "ptrace thread event -> perf cpu event (Non-overlapping)", + "ptrace thread event -> perf same thread & cpu event (Overlapping)", + "ptrace thread event -> perf same thread & cpu event (Non-overlapping)", + "ptrace thread event -> perf other thread & cpu event", +}; + +static int test(pid_t child_pid) +{ + int ret = TEST_PASS; + + ret |= do_test(desc[0], test1, child_pid); + ret |= do_test(desc[1], test2, child_pid); + ret |= do_test(desc[2], test3, child_pid); + ret |= do_test(desc[3], test4, child_pid); + ret |= do_test(desc[4], test5, child_pid); + ret |= do_test(desc[5], test6, child_pid); + ret |= do_test(desc[6], test7, child_pid); + ret |= do_test(desc[7], test8, child_pid); + ret |= do_test(desc[8], test9, child_pid); + ret |= do_test(desc[9], test10, child_pid); + ret |= do_test(desc[10], test11, child_pid); + ret |= do_test(desc[11], test12, child_pid); + ret |= do_test(desc[12], test13, child_pid); + ret |= do_test(desc[13], test14, child_pid); + + return ret; +} + +static void get_dbginfo(pid_t child_pid, struct ppc_debug_info *dbginfo) +{ + if (ptrace(PPC_PTRACE_GETHWDBGINFO, child_pid, NULL, dbginfo)) { + perror("Can't get breakpoint info"); + exit(-1); + } +} + +static int ptrace_perf_hwbreak(void) +{ + int ret; + pid_t child_pid; + struct ppc_debug_info dbginfo; + + child_pid = fork(); + if (!child_pid) + return child(); + + /* parent */ + wait(NULL); /* <-- child (SIGUSR1) */ + + get_dbginfo(child_pid, &dbginfo); + SKIP_IF(dbginfo.num_data_bps <= 1); + + ret = perf_cpu_event_open(0, (__u64)perf_data1, sizeof(*perf_data1)); + SKIP_IF(ret < 0); + close(ret); + + ret = test(child_pid); + + ptrace(PTRACE_CONT, child_pid, NULL, 0); + return ret; +} + +int main(int argc, char *argv[]) +{ + return test_harness(ptrace_perf_hwbreak, "ptrace-perf-hwbreak"); +} diff --git a/tools/testing/selftests/powerpc/security/Makefile b/tools/testing/selftests/powerpc/security/Makefile index f25e854fe370..844d18cd5f93 100644 --- a/tools/testing/selftests/powerpc/security/Makefile +++ b/tools/testing/selftests/powerpc/security/Makefile @@ -1,6 +1,6 @@ # SPDX-License-Identifier: GPL-2.0+ -TEST_GEN_PROGS := rfi_flush entry_flush spectre_v2 +TEST_GEN_PROGS := rfi_flush entry_flush uaccess_flush spectre_v2 top_srcdir = ../../../../.. CFLAGS += -I../../../../../usr/include @@ -13,3 +13,4 @@ $(OUTPUT)/spectre_v2: CFLAGS += -m64 $(OUTPUT)/spectre_v2: ../pmu/event.c branch_loops.S $(OUTPUT)/rfi_flush: flush_utils.c $(OUTPUT)/entry_flush: flush_utils.c +$(OUTPUT)/uaccess_flush: flush_utils.c diff --git a/tools/testing/selftests/powerpc/security/entry_flush.c b/tools/testing/selftests/powerpc/security/entry_flush.c index 78cf914fa321..68ce377b205e 100644 --- a/tools/testing/selftests/powerpc/security/entry_flush.c +++ b/tools/testing/selftests/powerpc/security/entry_flush.c @@ -53,7 +53,7 @@ int entry_flush_test(void) entry_flush = entry_flush_orig; - fd = perf_event_open_counter(PERF_TYPE_RAW, /* L1d miss */ 0x400f0, -1); + fd = perf_event_open_counter(PERF_TYPE_HW_CACHE, PERF_L1D_READ_MISS_CONFIG, -1); FAIL_IF(fd < 0); p = (char *)memalign(zero_size, CACHELINE_SIZE); diff --git a/tools/testing/selftests/powerpc/security/flush_utils.c b/tools/testing/selftests/powerpc/security/flush_utils.c index 0c3c4c40c7fb..4d95965cb751 100644 --- a/tools/testing/selftests/powerpc/security/flush_utils.c +++ b/tools/testing/selftests/powerpc/security/flush_utils.c @@ -13,6 +13,7 @@ #include <stdlib.h> #include <string.h> #include <stdio.h> +#include <sys/utsname.h> #include "utils.h" #include "flush_utils.h" @@ -35,6 +36,18 @@ void syscall_loop(char *p, unsigned long iterations, } } +void syscall_loop_uaccess(char *p, unsigned long iterations, + unsigned long zero_size) +{ + struct utsname utsname; + + for (unsigned long i = 0; i < iterations; i++) { + for (unsigned long j = 0; j < zero_size; j += CACHELINE_SIZE) + load(p + j); + uname(&utsname); + } +} + static void sigill_handler(int signr, siginfo_t *info, void *unused) { static int warned; diff --git a/tools/testing/selftests/powerpc/security/flush_utils.h b/tools/testing/selftests/powerpc/security/flush_utils.h index 07a5eb301466..e1e68281f7ac 100644 --- a/tools/testing/selftests/powerpc/security/flush_utils.h +++ b/tools/testing/selftests/powerpc/security/flush_utils.h @@ -9,9 +9,16 @@ #define CACHELINE_SIZE 128 +#define PERF_L1D_READ_MISS_CONFIG ((PERF_COUNT_HW_CACHE_L1D) | \ + (PERF_COUNT_HW_CACHE_OP_READ << 8) | \ + (PERF_COUNT_HW_CACHE_RESULT_MISS << 16)) + void syscall_loop(char *p, unsigned long iterations, unsigned long zero_size); +void syscall_loop_uaccess(char *p, unsigned long iterations, + unsigned long zero_size); + void set_dscr(unsigned long val); #endif /* _SELFTESTS_POWERPC_SECURITY_FLUSH_UTILS_H */ diff --git a/tools/testing/selftests/powerpc/security/rfi_flush.c b/tools/testing/selftests/powerpc/security/rfi_flush.c index 7565fd786640..f73484a6470f 100644 --- a/tools/testing/selftests/powerpc/security/rfi_flush.c +++ b/tools/testing/selftests/powerpc/security/rfi_flush.c @@ -54,7 +54,7 @@ int rfi_flush_test(void) rfi_flush = rfi_flush_orig; - fd = perf_event_open_counter(PERF_TYPE_RAW, /* L1d miss */ 0x400f0, -1); + fd = perf_event_open_counter(PERF_TYPE_HW_CACHE, PERF_L1D_READ_MISS_CONFIG, -1); FAIL_IF(fd < 0); p = (char *)memalign(zero_size, CACHELINE_SIZE); diff --git a/tools/testing/selftests/powerpc/security/uaccess_flush.c b/tools/testing/selftests/powerpc/security/uaccess_flush.c new file mode 100644 index 000000000000..cf80f960e38a --- /dev/null +++ b/tools/testing/selftests/powerpc/security/uaccess_flush.c @@ -0,0 +1,158 @@ +// SPDX-License-Identifier: GPL-2.0+ + +/* + * Copyright 2018 IBM Corporation. + * Copyright 2020 Canonical Ltd. + */ + +#define __SANE_USERSPACE_TYPES__ + +#include <sys/types.h> +#include <stdint.h> +#include <malloc.h> +#include <unistd.h> +#include <signal.h> +#include <stdlib.h> +#include <string.h> +#include <stdio.h> +#include "utils.h" +#include "flush_utils.h" + +int uaccess_flush_test(void) +{ + char *p; + int repetitions = 10; + int fd, passes = 0, iter, rc = 0; + struct perf_event_read v; + __u64 l1d_misses_total = 0; + unsigned long iterations = 100000, zero_size = 24 * 1024; + unsigned long l1d_misses_expected; + int rfi_flush_orig; + int entry_flush_orig; + int uaccess_flush, uaccess_flush_orig; + + SKIP_IF(geteuid() != 0); + + // The PMU event we use only works on Power7 or later + SKIP_IF(!have_hwcap(PPC_FEATURE_ARCH_2_06)); + + if (read_debugfs_file("powerpc/rfi_flush", &rfi_flush_orig) < 0) { + perror("Unable to read powerpc/rfi_flush debugfs file"); + SKIP_IF(1); + } + + if (read_debugfs_file("powerpc/entry_flush", &entry_flush_orig) < 0) { + perror("Unable to read powerpc/entry_flush debugfs file"); + SKIP_IF(1); + } + + if (read_debugfs_file("powerpc/uaccess_flush", &uaccess_flush_orig) < 0) { + perror("Unable to read powerpc/entry_flush debugfs file"); + SKIP_IF(1); + } + + if (rfi_flush_orig != 0) { + if (write_debugfs_file("powerpc/rfi_flush", 0) < 0) { + perror("error writing to powerpc/rfi_flush debugfs file"); + FAIL_IF(1); + } + } + + if (entry_flush_orig != 0) { + if (write_debugfs_file("powerpc/entry_flush", 0) < 0) { + perror("error writing to powerpc/entry_flush debugfs file"); + FAIL_IF(1); + } + } + + uaccess_flush = uaccess_flush_orig; + + fd = perf_event_open_counter(PERF_TYPE_HW_CACHE, PERF_L1D_READ_MISS_CONFIG, -1); + FAIL_IF(fd < 0); + + p = (char *)memalign(zero_size, CACHELINE_SIZE); + + FAIL_IF(perf_event_enable(fd)); + + // disable L1 prefetching + set_dscr(1); + + iter = repetitions; + + /* + * We expect to see l1d miss for each cacheline access when entry_flush + * is set. Allow a small variation on this. + */ + l1d_misses_expected = iterations * (zero_size / CACHELINE_SIZE - 2); + +again: + FAIL_IF(perf_event_reset(fd)); + + syscall_loop_uaccess(p, iterations, zero_size); + + FAIL_IF(read(fd, &v, sizeof(v)) != sizeof(v)); + + if (uaccess_flush && v.l1d_misses >= l1d_misses_expected) + passes++; + else if (!uaccess_flush && v.l1d_misses < (l1d_misses_expected / 2)) + passes++; + + l1d_misses_total += v.l1d_misses; + + while (--iter) + goto again; + + if (passes < repetitions) { + printf("FAIL (L1D misses with uaccess_flush=%d: %llu %c %lu) [%d/%d failures]\n", + uaccess_flush, l1d_misses_total, uaccess_flush ? '<' : '>', + uaccess_flush ? repetitions * l1d_misses_expected : + repetitions * l1d_misses_expected / 2, + repetitions - passes, repetitions); + rc = 1; + } else { + printf("PASS (L1D misses with uaccess_flush=%d: %llu %c %lu) [%d/%d pass]\n", + uaccess_flush, l1d_misses_total, uaccess_flush ? '>' : '<', + uaccess_flush ? repetitions * l1d_misses_expected : + repetitions * l1d_misses_expected / 2, + passes, repetitions); + } + + if (uaccess_flush == uaccess_flush_orig) { + uaccess_flush = !uaccess_flush_orig; + if (write_debugfs_file("powerpc/uaccess_flush", uaccess_flush) < 0) { + perror("error writing to powerpc/uaccess_flush debugfs file"); + return 1; + } + iter = repetitions; + l1d_misses_total = 0; + passes = 0; + goto again; + } + + perf_event_disable(fd); + close(fd); + + set_dscr(0); + + if (write_debugfs_file("powerpc/rfi_flush", rfi_flush_orig) < 0) { + perror("unable to restore original value of powerpc/rfi_flush debugfs file"); + return 1; + } + + if (write_debugfs_file("powerpc/entry_flush", entry_flush_orig) < 0) { + perror("unable to restore original value of powerpc/entry_flush debugfs file"); + return 1; + } + + if (write_debugfs_file("powerpc/uaccess_flush", uaccess_flush_orig) < 0) { + perror("unable to restore original value of powerpc/uaccess_flush debugfs file"); + return 1; + } + + return rc; +} + +int main(int argc, char *argv[]) +{ + return test_harness(uaccess_flush_test, "uaccess_flush_test"); +} diff --git a/tools/testing/selftests/powerpc/tm/tm-trap.c b/tools/testing/selftests/powerpc/tm/tm-trap.c index c75960af8018..11521077f915 100644 --- a/tools/testing/selftests/powerpc/tm/tm-trap.c +++ b/tools/testing/selftests/powerpc/tm/tm-trap.c @@ -66,7 +66,7 @@ void trap_signal_handler(int signo, siginfo_t *si, void *uc) /* Get thread endianness: extract bit LE from MSR */ thread_endianness = MSR_LE & ucp->uc_mcontext.gp_regs[PT_MSR]; - /*** + /* * Little-Endian Machine */ @@ -126,7 +126,7 @@ void trap_signal_handler(int signo, siginfo_t *si, void *uc) } } - /*** + /* * Big-Endian Machine */ diff --git a/tools/testing/selftests/timens/gettime_perf.c b/tools/testing/selftests/timens/gettime_perf.c index 7bf841a3967b..6b13dc277724 100644 --- a/tools/testing/selftests/timens/gettime_perf.c +++ b/tools/testing/selftests/timens/gettime_perf.c @@ -25,6 +25,12 @@ static void fill_function_pointers(void) if (!vdso) vdso = dlopen("linux-gate.so.1", RTLD_LAZY | RTLD_LOCAL | RTLD_NOLOAD); + if (!vdso) + vdso = dlopen("linux-vdso32.so.1", + RTLD_LAZY | RTLD_LOCAL | RTLD_NOLOAD); + if (!vdso) + vdso = dlopen("linux-vdso64.so.1", + RTLD_LAZY | RTLD_LOCAL | RTLD_NOLOAD); if (!vdso) { pr_err("[WARN]\tfailed to find vDSO\n"); return; @@ -32,6 +38,8 @@ static void fill_function_pointers(void) vdso_clock_gettime = (vgettime_t)dlsym(vdso, "__vdso_clock_gettime"); if (!vdso_clock_gettime) + vdso_clock_gettime = (vgettime_t)dlsym(vdso, "__kernel_clock_gettime"); + if (!vdso_clock_gettime) pr_err("Warning: failed to find clock_gettime in vDSO\n"); } |