From ca8e055c22155b262e14409a555bab41a52edfea Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Sat, 18 Sep 2021 11:52:17 +0200 Subject: scripts: get_abi.pl: add a graph to speedup the undefined algorithm Searching for symlinks is an expensive operation with the current logic, as it is at the order of O(n^3). In practice, running the check spends 2-3 minutes to check all symbols. Fix it by storing the directory tree into a graph, and using a Breadth First Search (BFS) to find the links for each sysfs node. With such improvement, it can now report issues with ~11 seconds on my machine. It comes with a price, though: there are more symbols reported as undefined after this change. I suspect it is due to some sysfs circular loops that are dropped by BFS. Despite such increase, it seems that the reports are now more coherent. Signed-off-by: Mauro Carvalho Chehab Link: https://lore.kernel.org/r/f5c1e7b14a27132821c08f0459ba9aea3ed69028.1631957565.git.mchehab+huawei@kernel.org Signed-off-by: Greg Kroah-Hartman --- scripts/get_abi.pl | 188 ++++++++++++++++++++++++++++++++++++----------------- 1 file changed, 127 insertions(+), 61 deletions(-) (limited to 'scripts/get_abi.pl') diff --git a/scripts/get_abi.pl b/scripts/get_abi.pl index aa0a751563ba..c52a1cf0f49d 100755 --- a/scripts/get_abi.pl +++ b/scripts/get_abi.pl @@ -546,6 +546,73 @@ sub dont_parse_special_attributes { my %leaf; my %aliases; my @files; +my %root; + +sub graph_add_file { + my $file = shift; + my $type = shift; + + my $dir = $file; + $dir =~ s,^(.*/).*,$1,; + $file =~ s,.*/,,; + + my $name; + my $file_ref = \%root; + foreach my $edge(split "/", $dir) { + $name .= "$edge/"; + if (!defined ${$file_ref}{$edge}) { + ${$file_ref}{$edge} = { }; + } + $file_ref = \%{$$file_ref{$edge}}; + ${$file_ref}{"__name"} = [ $name ]; + } + $name .= "$file"; + ${$file_ref}{$file} = { + "__name" => [ $name ] + }; + + return \%{$$file_ref{$file}}; +} + +sub graph_add_link { + my $file = shift; + my $link = shift; + + # Traverse graph to find the reference + my $file_ref = \%root; + foreach my $edge(split "/", $file) { + $file_ref = \%{$$file_ref{$edge}} || die "Missing node!"; + } + + # do a BFS + + my @queue; + my %seen; + my $base_name; + my $st; + + push @queue, $file_ref; + $seen{$start}++; + + while (@queue) { + my $v = shift @queue; + my @child = keys(%{$v}); + + foreach my $c(@child) { + next if $seen{$$v{$c}}; + next if ($c eq "__name"); + + # Add new name + my $name = @{$$v{$c}{"__name"}}[0]; + if ($name =~ s#^$file/#$link/#) { + push @{$$v{$c}{"__name"}}, $name; + } + # Add child to the queue and mark as seen + push @queue, $$v{$c}; + $seen{$c}++; + } + } +} my $escape_symbols = qr { ([\x01-\x08\x0e-\x1f\x21-\x29\x2b-\x2d\x3a-\x40\x7b-\xfe]) }x; sub parse_existing_sysfs { @@ -568,19 +635,50 @@ sub parse_existing_sysfs { return if (defined($data{$file})); return if (defined($data{$abs_file})); - push @files, $abs_file; + push @files, graph_add_file($abs_file, "file"); +} + +sub get_leave($) +{ + my $what = shift; + my $leave; + + my $l = $what; + my $stop = 1; + + $leave = $l; + $leave =~ s,/$,,; + $leave =~ s,.*/,,; + $leave =~ s/[\(\)]//g; + + # $leave is used to improve search performance at + # check_undefined_symbols, as the algorithm there can seek + # for a small number of "what". It also allows giving a + # hint about a leave with the same name somewhere else. + # However, there are a few occurences where the leave is + # either a wildcard or a number. Just group such cases + # altogether. + if ($leave =~ m/^\.\*/ || $leave eq "" || $leave =~ /^\d+$/) { + $leave = "others"; + } + + return $leave; } sub check_undefined_symbols { - foreach my $file (sort @files) { + foreach my $file_ref (sort @files) { + my @names = @{$$file_ref{"__name"}}; + my $file = $names[0]; my $defined = 0; my $exact = 0; - my $whats = ""; my $found_string; - my $leave = $file; - $leave =~ s,.*/,,; + my $leave = get_leave($file); + if (!defined($leaf{$leave})) { + $leave = "others"; + } + my $what = $leaf{$leave}; my $path = $file; $path =~ s,(.*/).*,$1,; @@ -590,41 +688,12 @@ sub check_undefined_symbols { $found_string = 1; } - if ($leave =~ /^\d+$/ || !defined($leaf{$leave})) { - $leave = "others"; - } - - print "--> $file\n" if ($found_string && $hint); - my $what = $leaf{$leave}; - $whats .= " $what" if (!($whats =~ m/$what/)); - - foreach my $w (split / /, $what) { - if ($file =~ m#^$w$#) { - $exact = 1; - last; - } - } - # Check for aliases - # - # TODO: this algorithm is O(w * n²). It can be - # improved in the future in order to handle it - # faster, by changing parse_existing_sysfs to - # store the sysfs inside a tree, at the expense - # on making the code less readable and/or using some - # additional perl library. - foreach my $a (keys %aliases) { - my $new = $aliases{$a}; - my $len = length($new); - - if (substr($file, 0, $len) eq $new) { - my $newf = $a . substr($file, $len); - - print " $newf\n" if ($found_string && $hint); - foreach my $w (split / /, $what) { - if ($newf =~ m#^$w$#) { - $exact = 1; - last; - } + foreach my $a (@names) { + print "--> $a\n" if ($found_string && $hint); + foreach my $w (split /\xac/, $what) { + if ($a =~ m#^$w$#) { + $exact = 1; + last; } } } @@ -641,8 +710,13 @@ sub check_undefined_symbols { # is not easily parseable. next if ($file =~ m#/parameters/#); - if ($hint && $defined && $leave ne "others") { - print "$leave at $path might be one of:$whats\n" if (!$search_string || $found_string); + if ($hint && $defined && (!$search_string || $found_string)) { + $what =~ s/\xac/\n\t/g; + if ($leave ne "others") { + print " more likely regexes:\n\t$what\n"; + } else { + print " tested regexes:\n\t$what\n"; + } next; } print "$file not found.\n" if (!$search_string || $found_string); @@ -656,8 +730,10 @@ sub undefined_symbols { no_chdir => 1 }, $sysfs_prefix); + $leaf{"others"} = ""; + foreach my $w (sort keys %data) { - foreach my $what (split /\xac /,$w) { + foreach my $what (split /\xac/,$w) { next if (!($what =~ m/^$sysfs_prefix/)); # Convert what into regular expressions @@ -700,19 +776,7 @@ sub undefined_symbols { # (this happens on a few IIO definitions) $what =~ s,\s*\=.*$,,; - my $leave = $what; - $leave =~ s,.*/,,; - - # $leave is used to improve search performance at - # check_undefined_symbols, as the algorithm there can seek - # for a small number of "what". It also allows giving a - # hint about a leave with the same name somewhere else. - # However, there are a few occurences where the leave is - # either a wildcard or a number. Just group such cases - # altogether. - if ($leave =~ m/^\.\*/ || $leave eq "" || $leave =~ /^\d+$/) { - $leave = "others" ; - } + my $leave = get_leave($what); # Escape all other symbols $what =~ s/$escape_symbols/\\$1/g; @@ -722,17 +786,14 @@ sub undefined_symbols { $what =~ s/\xff/\\d+/g; - # Special case: IIO ABI which a parenthesis. $what =~ s/sqrt(.*)/sqrt\(.*\)/; - $leave =~ s/[\(\)]//g; - my $added = 0; foreach my $l (split /\|/, $leave) { if (defined($leaf{$l})) { - next if ($leaf{$l} =~ m/$what/); - $leaf{$l} .= " " . $what; + next if ($leaf{$l} =~ m/\b$what\b/); + $leaf{$l} .= "\xac" . $what; $added = 1; } else { $leaf{$l} = $what; @@ -745,6 +806,11 @@ sub undefined_symbols { } } + # Take links into account + foreach my $link (keys %aliases) { + my $abs_file = $aliases{$link}; + graph_add_link($abs_file, $link); + } check_undefined_symbols; } -- cgit v1.2.3-59-g8ed1b