aboutsummaryrefslogtreecommitdiffstatshomepage
path: root/tools/perf/scripts/python
diff options
context:
space:
mode:
Diffstat (limited to 'tools/perf/scripts/python')
-rw-r--r--tools/perf/scripts/python/Perf-Trace-Util/Build3
-rw-r--r--tools/perf/scripts/python/Perf-Trace-Util/lib/Perf/Trace/Util.py7
-rwxr-xr-xtools/perf/scripts/python/arm-cs-trace-disasm.py30
-rw-r--r--tools/perf/scripts/python/bin/gecko-record2
-rwxr-xr-xtools/perf/scripts/python/bin/gecko-report7
-rw-r--r--tools/perf/scripts/python/compaction-times.py2
-rwxr-xr-xtools/perf/scripts/python/exported-sql-viewer.py4
-rw-r--r--tools/perf/scripts/python/gecko.py395
-rwxr-xr-xtools/perf/scripts/python/parallel-perf.py988
9 files changed, 1417 insertions, 21 deletions
diff --git a/tools/perf/scripts/python/Perf-Trace-Util/Build b/tools/perf/scripts/python/Perf-Trace-Util/Build
index 7d0e33ce6aba..5b0b5ff7e14a 100644
--- a/tools/perf/scripts/python/Perf-Trace-Util/Build
+++ b/tools/perf/scripts/python/Perf-Trace-Util/Build
@@ -1,3 +1,4 @@
perf-y += Context.o
-CFLAGS_Context.o += $(PYTHON_EMBED_CCOPTS) -Wno-redundant-decls -Wno-strict-prototypes -Wno-unused-parameter -Wno-nested-externs
+# -Wno-declaration-after-statement: The python headers have mixed code with declarations (decls after asserts, for instance)
+CFLAGS_Context.o += $(PYTHON_EMBED_CCOPTS) -Wno-redundant-decls -Wno-strict-prototypes -Wno-unused-parameter -Wno-nested-externs -Wno-declaration-after-statement
diff --git a/tools/perf/scripts/python/Perf-Trace-Util/lib/Perf/Trace/Util.py b/tools/perf/scripts/python/Perf-Trace-Util/lib/Perf/Trace/Util.py
index 7384dcb628c4..b75d31858e54 100644
--- a/tools/perf/scripts/python/Perf-Trace-Util/lib/Perf/Trace/Util.py
+++ b/tools/perf/scripts/python/Perf-Trace-Util/lib/Perf/Trace/Util.py
@@ -54,6 +54,7 @@ try:
import audit
machine_to_id = {
'x86_64': audit.MACH_86_64,
+ 'aarch64': audit.MACH_AARCH64,
'alpha' : audit.MACH_ALPHA,
'ia64' : audit.MACH_IA64,
'ppc' : audit.MACH_PPC,
@@ -73,9 +74,9 @@ try:
except:
if not audit_package_warned:
audit_package_warned = True
- print("Install the audit-libs-python package to get syscall names.\n"
- "For example:\n # apt-get install python-audit (Ubuntu)"
- "\n # yum install audit-libs-python (Fedora)"
+ print("Install the python-audit package to get syscall names.\n"
+ "For example:\n # apt-get install python3-audit (Ubuntu)"
+ "\n # yum install python3-audit (Fedora)"
"\n etc.\n")
def syscall_name(id):
diff --git a/tools/perf/scripts/python/arm-cs-trace-disasm.py b/tools/perf/scripts/python/arm-cs-trace-disasm.py
index d59ff53f1d94..d973c2baed1c 100755
--- a/tools/perf/scripts/python/arm-cs-trace-disasm.py
+++ b/tools/perf/scripts/python/arm-cs-trace-disasm.py
@@ -45,8 +45,8 @@ parser = OptionParser(option_list=option_list)
# Initialize global dicts and regular expression
disasm_cache = dict()
cpu_data = dict()
-disasm_re = re.compile("^\s*([0-9a-fA-F]+):")
-disasm_func_re = re.compile("^\s*([0-9a-fA-F]+)\s.*:")
+disasm_re = re.compile(r"^\s*([0-9a-fA-F]+):")
+disasm_func_re = re.compile(r"^\s*([0-9a-fA-F]+)\s.*:")
cache_size = 64*1024
glb_source_file_name = None
@@ -188,6 +188,17 @@ def process_event(param_dict):
dso_end = get_optional(param_dict, "dso_map_end")
symbol = get_optional(param_dict, "symbol")
+ cpu = sample["cpu"]
+ ip = sample["ip"]
+ addr = sample["addr"]
+
+ # Initialize CPU data if it's empty, and directly return back
+ # if this is the first tracing event for this CPU.
+ if (cpu_data.get(str(cpu) + 'addr') == None):
+ cpu_data[str(cpu) + 'addr'] = addr
+ return
+
+
if (options.verbose == True):
print("Event type: %s" % name)
print_sample(sample)
@@ -209,16 +220,6 @@ def process_event(param_dict):
if (name[0:8] != "branches"):
return
- cpu = sample["cpu"]
- ip = sample["ip"]
- addr = sample["addr"]
-
- # Initialize CPU data if it's empty, and directly return back
- # if this is the first tracing event for this CPU.
- if (cpu_data.get(str(cpu) + 'addr') == None):
- cpu_data[str(cpu) + 'addr'] = addr
- return
-
# The format for packet is:
#
# +------------+------------+------------+
@@ -258,8 +259,9 @@ def process_event(param_dict):
if (options.objdump_name != None):
# It doesn't need to decrease virtual memory offset for disassembly
- # for kernel dso, so in this case we set vm_start to zero.
- if (dso == "[kernel.kallsyms]"):
+ # for kernel dso and executable file dso, so in this case we set
+ # vm_start to zero.
+ if (dso == "[kernel.kallsyms]" or dso_start == 0x400000):
dso_vm_start = 0
else:
dso_vm_start = int(dso_start)
diff --git a/tools/perf/scripts/python/bin/gecko-record b/tools/perf/scripts/python/bin/gecko-record
new file mode 100644
index 000000000000..f0d1aa55f171
--- /dev/null
+++ b/tools/perf/scripts/python/bin/gecko-record
@@ -0,0 +1,2 @@
+#!/bin/bash
+perf record -F 99 -g "$@"
diff --git a/tools/perf/scripts/python/bin/gecko-report b/tools/perf/scripts/python/bin/gecko-report
new file mode 100755
index 000000000000..1867ec8d9757
--- /dev/null
+++ b/tools/perf/scripts/python/bin/gecko-report
@@ -0,0 +1,7 @@
+#!/bin/bash
+# description: create firefox gecko profile json format from perf.data
+if [ "$*" = "-i -" ]; then
+perf script -s "$PERF_EXEC_PATH"/scripts/python/gecko.py
+else
+perf script -s "$PERF_EXEC_PATH"/scripts/python/gecko.py -- "$@"
+fi
diff --git a/tools/perf/scripts/python/compaction-times.py b/tools/perf/scripts/python/compaction-times.py
index 2560a042dc6f..9401f7c14747 100644
--- a/tools/perf/scripts/python/compaction-times.py
+++ b/tools/perf/scripts/python/compaction-times.py
@@ -260,7 +260,7 @@ def pr_help():
comm_re = None
pid_re = None
-pid_regex = "^(\d*)-(\d*)$|^(\d*)$"
+pid_regex = r"^(\d*)-(\d*)$|^(\d*)$"
opt_proc = popt.DISP_DFL
opt_disp = topt.DISP_ALL
diff --git a/tools/perf/scripts/python/exported-sql-viewer.py b/tools/perf/scripts/python/exported-sql-viewer.py
index 13f2d8a81610..121cf61ba1b3 100755
--- a/tools/perf/scripts/python/exported-sql-viewer.py
+++ b/tools/perf/scripts/python/exported-sql-viewer.py
@@ -677,8 +677,8 @@ class CallGraphModelBase(TreeModel):
# sqlite supports GLOB (text only) which uses * and ? and is case sensitive
if not self.glb.dbref.is_sqlite3:
# Escape % and _
- s = value.replace("%", "\%")
- s = s.replace("_", "\_")
+ s = value.replace("%", "\\%")
+ s = s.replace("_", "\\_")
# Translate * and ? into SQL LIKE pattern characters % and _
trans = string.maketrans("*?", "%_")
match = " LIKE '" + str(s).translate(trans) + "'"
diff --git a/tools/perf/scripts/python/gecko.py b/tools/perf/scripts/python/gecko.py
new file mode 100644
index 000000000000..bc5a72f94bfa
--- /dev/null
+++ b/tools/perf/scripts/python/gecko.py
@@ -0,0 +1,395 @@
+# gecko.py - Convert perf record output to Firefox's gecko profile format
+# SPDX-License-Identifier: GPL-2.0
+#
+# The script converts perf.data to Gecko Profile Format,
+# which can be read by https://profiler.firefox.com/.
+#
+# Usage:
+#
+# perf record -a -g -F 99 sleep 60
+# perf script report gecko
+#
+# Combined:
+#
+# perf script gecko -F 99 -a sleep 60
+
+import os
+import sys
+import time
+import json
+import string
+import random
+import argparse
+import threading
+import webbrowser
+import urllib.parse
+from os import system
+from functools import reduce
+from dataclasses import dataclass, field
+from http.server import HTTPServer, SimpleHTTPRequestHandler, test
+from typing import List, Dict, Optional, NamedTuple, Set, Tuple, Any
+
+# Add the Perf-Trace-Util library to the Python path
+sys.path.append(os.environ['PERF_EXEC_PATH'] + \
+ '/scripts/python/Perf-Trace-Util/lib/Perf/Trace')
+
+from perf_trace_context import *
+from Core import *
+
+StringID = int
+StackID = int
+FrameID = int
+CategoryID = int
+Milliseconds = float
+
+# start_time is intialiazed only once for the all event traces.
+start_time = None
+
+# https://github.com/firefox-devtools/profiler/blob/53970305b51b9b472e26d7457fee1d66cd4e2737/src/types/profile.js#L425
+# Follow Brendan Gregg's Flamegraph convention: orange for kernel and yellow for user space by default.
+CATEGORIES = None
+
+# The product name is used by the profiler UI to show the Operating system and Processor.
+PRODUCT = os.popen('uname -op').read().strip()
+
+# store the output file
+output_file = None
+
+# Here key = tid, value = Thread
+tid_to_thread = dict()
+
+# The HTTP server is used to serve the profile to the profiler UI.
+http_server_thread = None
+
+# The category index is used by the profiler UI to show the color of the flame graph.
+USER_CATEGORY_INDEX = 0
+KERNEL_CATEGORY_INDEX = 1
+
+# https://github.com/firefox-devtools/profiler/blob/53970305b51b9b472e26d7457fee1d66cd4e2737/src/types/gecko-profile.js#L156
+class Frame(NamedTuple):
+ string_id: StringID
+ relevantForJS: bool
+ innerWindowID: int
+ implementation: None
+ optimizations: None
+ line: None
+ column: None
+ category: CategoryID
+ subcategory: int
+
+# https://github.com/firefox-devtools/profiler/blob/53970305b51b9b472e26d7457fee1d66cd4e2737/src/types/gecko-profile.js#L216
+class Stack(NamedTuple):
+ prefix_id: Optional[StackID]
+ frame_id: FrameID
+
+# https://github.com/firefox-devtools/profiler/blob/53970305b51b9b472e26d7457fee1d66cd4e2737/src/types/gecko-profile.js#L90
+class Sample(NamedTuple):
+ stack_id: Optional[StackID]
+ time_ms: Milliseconds
+ responsiveness: int
+
+@dataclass
+class Thread:
+ """A builder for a profile of the thread.
+
+ Attributes:
+ comm: Thread command-line (name).
+ pid: process ID of containing process.
+ tid: thread ID.
+ samples: Timeline of profile samples.
+ frameTable: interned stack frame ID -> stack frame.
+ stringTable: interned string ID -> string.
+ stringMap: interned string -> string ID.
+ stackTable: interned stack ID -> stack.
+ stackMap: (stack prefix ID, leaf stack frame ID) -> interned Stack ID.
+ frameMap: Stack Frame string -> interned Frame ID.
+ comm: str
+ pid: int
+ tid: int
+ samples: List[Sample] = field(default_factory=list)
+ frameTable: List[Frame] = field(default_factory=list)
+ stringTable: List[str] = field(default_factory=list)
+ stringMap: Dict[str, int] = field(default_factory=dict)
+ stackTable: List[Stack] = field(default_factory=list)
+ stackMap: Dict[Tuple[Optional[int], int], int] = field(default_factory=dict)
+ frameMap: Dict[str, int] = field(default_factory=dict)
+ """
+ comm: str
+ pid: int
+ tid: int
+ samples: List[Sample] = field(default_factory=list)
+ frameTable: List[Frame] = field(default_factory=list)
+ stringTable: List[str] = field(default_factory=list)
+ stringMap: Dict[str, int] = field(default_factory=dict)
+ stackTable: List[Stack] = field(default_factory=list)
+ stackMap: Dict[Tuple[Optional[int], int], int] = field(default_factory=dict)
+ frameMap: Dict[str, int] = field(default_factory=dict)
+
+ def _intern_stack(self, frame_id: int, prefix_id: Optional[int]) -> int:
+ """Gets a matching stack, or saves the new stack. Returns a Stack ID."""
+ key = f"{frame_id}" if prefix_id is None else f"{frame_id},{prefix_id}"
+ # key = (prefix_id, frame_id)
+ stack_id = self.stackMap.get(key)
+ if stack_id is None:
+ # return stack_id
+ stack_id = len(self.stackTable)
+ self.stackTable.append(Stack(prefix_id=prefix_id, frame_id=frame_id))
+ self.stackMap[key] = stack_id
+ return stack_id
+
+ def _intern_string(self, string: str) -> int:
+ """Gets a matching string, or saves the new string. Returns a String ID."""
+ string_id = self.stringMap.get(string)
+ if string_id is not None:
+ return string_id
+ string_id = len(self.stringTable)
+ self.stringTable.append(string)
+ self.stringMap[string] = string_id
+ return string_id
+
+ def _intern_frame(self, frame_str: str) -> int:
+ """Gets a matching stack frame, or saves the new frame. Returns a Frame ID."""
+ frame_id = self.frameMap.get(frame_str)
+ if frame_id is not None:
+ return frame_id
+ frame_id = len(self.frameTable)
+ self.frameMap[frame_str] = frame_id
+ string_id = self._intern_string(frame_str)
+
+ symbol_name_to_category = KERNEL_CATEGORY_INDEX if frame_str.find('kallsyms') != -1 \
+ or frame_str.find('/vmlinux') != -1 \
+ or frame_str.endswith('.ko)') \
+ else USER_CATEGORY_INDEX
+
+ self.frameTable.append(Frame(
+ string_id=string_id,
+ relevantForJS=False,
+ innerWindowID=0,
+ implementation=None,
+ optimizations=None,
+ line=None,
+ column=None,
+ category=symbol_name_to_category,
+ subcategory=None,
+ ))
+ return frame_id
+
+ def _add_sample(self, comm: str, stack: List[str], time_ms: Milliseconds) -> None:
+ """Add a timestamped stack trace sample to the thread builder.
+ Args:
+ comm: command-line (name) of the thread at this sample
+ stack: sampled stack frames. Root first, leaf last.
+ time_ms: timestamp of sample in milliseconds.
+ """
+ # Ihreads may not set their names right after they are created.
+ # Instead, they might do it later. In such situations, to use the latest name they have set.
+ if self.comm != comm:
+ self.comm = comm
+
+ prefix_stack_id = reduce(lambda prefix_id, frame: self._intern_stack
+ (self._intern_frame(frame), prefix_id), stack, None)
+ if prefix_stack_id is not None:
+ self.samples.append(Sample(stack_id=prefix_stack_id,
+ time_ms=time_ms,
+ responsiveness=0))
+
+ def _to_json_dict(self) -> Dict:
+ """Converts current Thread to GeckoThread JSON format."""
+ # Gecko profile format is row-oriented data as List[List],
+ # And a schema for interpreting each index.
+ # Schema:
+ # https://github.com/firefox-devtools/profiler/blob/main/docs-developer/gecko-profile-format.md
+ # https://github.com/firefox-devtools/profiler/blob/53970305b51b9b472e26d7457fee1d66cd4e2737/src/types/gecko-profile.js#L230
+ return {
+ "tid": self.tid,
+ "pid": self.pid,
+ "name": self.comm,
+ # https://github.com/firefox-devtools/profiler/blob/53970305b51b9b472e26d7457fee1d66cd4e2737/src/types/gecko-profile.js#L51
+ "markers": {
+ "schema": {
+ "name": 0,
+ "startTime": 1,
+ "endTime": 2,
+ "phase": 3,
+ "category": 4,
+ "data": 5,
+ },
+ "data": [],
+ },
+
+ # https://github.com/firefox-devtools/profiler/blob/53970305b51b9b472e26d7457fee1d66cd4e2737/src/types/gecko-profile.js#L90
+ "samples": {
+ "schema": {
+ "stack": 0,
+ "time": 1,
+ "responsiveness": 2,
+ },
+ "data": self.samples
+ },
+
+ # https://github.com/firefox-devtools/profiler/blob/53970305b51b9b472e26d7457fee1d66cd4e2737/src/types/gecko-profile.js#L156
+ "frameTable": {
+ "schema": {
+ "location": 0,
+ "relevantForJS": 1,
+ "innerWindowID": 2,
+ "implementation": 3,
+ "optimizations": 4,
+ "line": 5,
+ "column": 6,
+ "category": 7,
+ "subcategory": 8,
+ },
+ "data": self.frameTable,
+ },
+
+ # https://github.com/firefox-devtools/profiler/blob/53970305b51b9b472e26d7457fee1d66cd4e2737/src/types/gecko-profile.js#L216
+ "stackTable": {
+ "schema": {
+ "prefix": 0,
+ "frame": 1,
+ },
+ "data": self.stackTable,
+ },
+ "stringTable": self.stringTable,
+ "registerTime": 0,
+ "unregisterTime": None,
+ "processType": "default",
+ }
+
+# Uses perf script python interface to parse each
+# event and store the data in the thread builder.
+def process_event(param_dict: Dict) -> None:
+ global start_time
+ global tid_to_thread
+ time_stamp = (param_dict['sample']['time'] // 1000) / 1000
+ pid = param_dict['sample']['pid']
+ tid = param_dict['sample']['tid']
+ comm = param_dict['comm']
+
+ # Start time is the time of the first sample
+ if not start_time:
+ start_time = time_stamp
+
+ # Parse and append the callchain of the current sample into a stack.
+ stack = []
+ if param_dict['callchain']:
+ for call in param_dict['callchain']:
+ if 'sym' not in call:
+ continue
+ stack.append(f'{call["sym"]["name"]} (in {call["dso"]})')
+ if len(stack) != 0:
+ # Reverse the stack, as root come first and the leaf at the end.
+ stack = stack[::-1]
+
+ # During perf record if -g is not used, the callchain is not available.
+ # In that case, the symbol and dso are available in the event parameters.
+ else:
+ func = param_dict['symbol'] if 'symbol' in param_dict else '[unknown]'
+ dso = param_dict['dso'] if 'dso' in param_dict else '[unknown]'
+ stack.append(f'{func} (in {dso})')
+
+ # Add sample to the specific thread.
+ thread = tid_to_thread.get(tid)
+ if thread is None:
+ thread = Thread(comm=comm, pid=pid, tid=tid)
+ tid_to_thread[tid] = thread
+ thread._add_sample(comm=comm, stack=stack, time_ms=time_stamp)
+
+def trace_begin() -> None:
+ global output_file
+ if (output_file is None):
+ print("Staring Firefox Profiler on your default browser...")
+ global http_server_thread
+ http_server_thread = threading.Thread(target=test, args=(CORSRequestHandler, HTTPServer,))
+ http_server_thread.daemon = True
+ http_server_thread.start()
+
+# Trace_end runs at the end and will be used to aggregate
+# the data into the final json object and print it out to stdout.
+def trace_end() -> None:
+ global output_file
+ threads = [thread._to_json_dict() for thread in tid_to_thread.values()]
+
+ # Schema: https://github.com/firefox-devtools/profiler/blob/53970305b51b9b472e26d7457fee1d66cd4e2737/src/types/gecko-profile.js#L305
+ gecko_profile_with_meta = {
+ "meta": {
+ "interval": 1,
+ "processType": 0,
+ "product": PRODUCT,
+ "stackwalk": 1,
+ "debug": 0,
+ "gcpoison": 0,
+ "asyncstack": 1,
+ "startTime": start_time,
+ "shutdownTime": None,
+ "version": 24,
+ "presymbolicated": True,
+ "categories": CATEGORIES,
+ "markerSchema": [],
+ },
+ "libs": [],
+ "threads": threads,
+ "processes": [],
+ "pausedRanges": [],
+ }
+ # launch the profiler on local host if not specified --save-only args, otherwise print to file
+ if (output_file is None):
+ output_file = 'gecko_profile.json'
+ with open(output_file, 'w') as f:
+ json.dump(gecko_profile_with_meta, f, indent=2)
+ launchFirefox(output_file)
+ time.sleep(1)
+ print(f'[ perf gecko: Captured and wrote into {output_file} ]')
+ else:
+ print(f'[ perf gecko: Captured and wrote into {output_file} ]')
+ with open(output_file, 'w') as f:
+ json.dump(gecko_profile_with_meta, f, indent=2)
+
+# Used to enable Cross-Origin Resource Sharing (CORS) for requests coming from 'https://profiler.firefox.com', allowing it to access resources from this server.
+class CORSRequestHandler(SimpleHTTPRequestHandler):
+ def end_headers (self):
+ self.send_header('Access-Control-Allow-Origin', 'https://profiler.firefox.com')
+ SimpleHTTPRequestHandler.end_headers(self)
+
+# start a local server to serve the gecko_profile.json file to the profiler.firefox.com
+def launchFirefox(file):
+ safe_string = urllib.parse.quote_plus(f'http://localhost:8000/{file}')
+ url = 'https://profiler.firefox.com/from-url/' + safe_string
+ webbrowser.open(f'{url}')
+
+def main() -> None:
+ global output_file
+ global CATEGORIES
+ parser = argparse.ArgumentParser(description="Convert perf.data to Firefox\'s Gecko Profile format which can be uploaded to profiler.firefox.com for visualization")
+
+ # Add the command-line options
+ # Colors must be defined according to this:
+ # https://github.com/firefox-devtools/profiler/blob/50124adbfa488adba6e2674a8f2618cf34b59cd2/res/css/categories.css
+ parser.add_argument('--user-color', default='yellow', help='Color for the User category', choices=['yellow', 'blue', 'purple', 'green', 'orange', 'red', 'grey', 'magenta'])
+ parser.add_argument('--kernel-color', default='orange', help='Color for the Kernel category', choices=['yellow', 'blue', 'purple', 'green', 'orange', 'red', 'grey', 'magenta'])
+ # If --save-only is specified, the output will be saved to a file instead of opening Firefox's profiler directly.
+ parser.add_argument('--save-only', help='Save the output to a file instead of opening Firefox\'s profiler')
+
+ # Parse the command-line arguments
+ args = parser.parse_args()
+ # Access the values provided by the user
+ user_color = args.user_color
+ kernel_color = args.kernel_color
+ output_file = args.save_only
+
+ CATEGORIES = [
+ {
+ "name": 'User',
+ "color": user_color,
+ "subcategories": ['Other']
+ },
+ {
+ "name": 'Kernel',
+ "color": kernel_color,
+ "subcategories": ['Other']
+ },
+ ]
+
+if __name__ == '__main__':
+ main()
diff --git a/tools/perf/scripts/python/parallel-perf.py b/tools/perf/scripts/python/parallel-perf.py
new file mode 100755
index 000000000000..21f32ec5ed46
--- /dev/null
+++ b/tools/perf/scripts/python/parallel-perf.py
@@ -0,0 +1,988 @@
+#!/usr/bin/env python3
+# SPDX-License-Identifier: GPL-2.0
+#
+# Run a perf script command multiple times in parallel, using perf script
+# options --cpu and --time so that each job processes a different chunk
+# of the data.
+#
+# Copyright (c) 2024, Intel Corporation.
+
+import subprocess
+import argparse
+import pathlib
+import shlex
+import time
+import copy
+import sys
+import os
+import re
+
+glb_prog_name = "parallel-perf.py"
+glb_min_interval = 10.0
+glb_min_samples = 64
+
+class Verbosity():
+
+ def __init__(self, quiet=False, verbose=False, debug=False):
+ self.normal = True
+ self.verbose = verbose
+ self.debug = debug
+ self.self_test = True
+ if self.debug:
+ self.verbose = True
+ if self.verbose:
+ quiet = False
+ if quiet:
+ self.normal = False
+
+# Manage work (Start/Wait/Kill), as represented by a subprocess.Popen command
+class Work():
+
+ def __init__(self, cmd, pipe_to, output_dir="."):
+ self.popen = None
+ self.consumer = None
+ self.cmd = cmd
+ self.pipe_to = pipe_to
+ self.output_dir = output_dir
+ self.cmdout_name = f"{output_dir}/cmd.txt"
+ self.stdout_name = f"{output_dir}/out.txt"
+ self.stderr_name = f"{output_dir}/err.txt"
+
+ def Command(self):
+ sh_cmd = [ shlex.quote(x) for x in self.cmd ]
+ return " ".join(self.cmd)
+
+ def Stdout(self):
+ return open(self.stdout_name, "w")
+
+ def Stderr(self):
+ return open(self.stderr_name, "w")
+
+ def CreateOutputDir(self):
+ pathlib.Path(self.output_dir).mkdir(parents=True, exist_ok=True)
+
+ def Start(self):
+ if self.popen:
+ return
+ self.CreateOutputDir()
+ with open(self.cmdout_name, "w") as f:
+ f.write(self.Command())
+ f.write("\n")
+ stdout = self.Stdout()
+ stderr = self.Stderr()
+ if self.pipe_to:
+ self.popen = subprocess.Popen(self.cmd, stdout=subprocess.PIPE, stderr=stderr)
+ args = shlex.split(self.pipe_to)
+ self.consumer = subprocess.Popen(args, stdin=self.popen.stdout, stdout=stdout, stderr=stderr)
+ else:
+ self.popen = subprocess.Popen(self.cmd, stdout=stdout, stderr=stderr)
+
+ def RemoveEmptyErrFile(self):
+ if os.path.exists(self.stderr_name):
+ if os.path.getsize(self.stderr_name) == 0:
+ os.unlink(self.stderr_name)
+
+ def Errors(self):
+ if os.path.exists(self.stderr_name):
+ if os.path.getsize(self.stderr_name) != 0:
+ return [ f"Non-empty error file {self.stderr_name}" ]
+ return []
+
+ def TidyUp(self):
+ self.RemoveEmptyErrFile()
+
+ def RawPollWait(self, p, wait):
+ if wait:
+ return p.wait()
+ return p.poll()
+
+ def Poll(self, wait=False):
+ if not self.popen:
+ return None
+ result = self.RawPollWait(self.popen, wait)
+ if self.consumer:
+ res = result
+ result = self.RawPollWait(self.consumer, wait)
+ if result != None and res == None:
+ self.popen.kill()
+ result = None
+ elif result == 0 and res != None and res != 0:
+ result = res
+ if result != None:
+ self.TidyUp()
+ return result
+
+ def Wait(self):
+ return self.Poll(wait=True)
+
+ def Kill(self):
+ if not self.popen:
+ return
+ self.popen.kill()
+ if self.consumer:
+ self.consumer.kill()
+
+def KillWork(worklist, verbosity):
+ for w in worklist:
+ w.Kill()
+ for w in worklist:
+ w.Wait()
+
+def NumberOfCPUs():
+ return os.sysconf("SC_NPROCESSORS_ONLN")
+
+def NanoSecsToSecsStr(x):
+ if x == None:
+ return ""
+ x = str(x)
+ if len(x) < 10:
+ x = "0" * (10 - len(x)) + x
+ return x[:len(x) - 9] + "." + x[-9:]
+
+def InsertOptionAfter(cmd, option, after):
+ try:
+ pos = cmd.index(after)
+ cmd.insert(pos + 1, option)
+ except:
+ cmd.append(option)
+
+def CreateWorkList(cmd, pipe_to, output_dir, cpus, time_ranges_by_cpu):
+ max_len = len(str(cpus[-1]))
+ cpu_dir_fmt = f"cpu-%.{max_len}u"
+ worklist = []
+ pos = 0
+ for cpu in cpus:
+ if cpu >= 0:
+ cpu_dir = os.path.join(output_dir, cpu_dir_fmt % cpu)
+ cpu_option = f"--cpu={cpu}"
+ else:
+ cpu_dir = output_dir
+ cpu_option = None
+
+ tr_dir_fmt = "time-range"
+
+ if len(time_ranges_by_cpu) > 1:
+ time_ranges = time_ranges_by_cpu[pos]
+ tr_dir_fmt += f"-{pos}"
+ pos += 1
+ else:
+ time_ranges = time_ranges_by_cpu[0]
+
+ max_len = len(str(len(time_ranges)))
+ tr_dir_fmt += f"-%.{max_len}u"
+
+ i = 0
+ for r in time_ranges:
+ if r == [None, None]:
+ time_option = None
+ work_output_dir = cpu_dir
+ else:
+ time_option = "--time=" + NanoSecsToSecsStr(r[0]) + "," + NanoSecsToSecsStr(r[1])
+ work_output_dir = os.path.join(cpu_dir, tr_dir_fmt % i)
+ i += 1
+ work_cmd = list(cmd)
+ if time_option != None:
+ InsertOptionAfter(work_cmd, time_option, "script")
+ if cpu_option != None:
+ InsertOptionAfter(work_cmd, cpu_option, "script")
+ w = Work(work_cmd, pipe_to, work_output_dir)
+ worklist.append(w)
+ return worklist
+
+def DoRunWork(worklist, nr_jobs, verbosity):
+ nr_to_do = len(worklist)
+ not_started = list(worklist)
+ running = []
+ done = []
+ chg = False
+ while True:
+ nr_done = len(done)
+ if chg and verbosity.normal:
+ nr_run = len(running)
+ print(f"\rThere are {nr_to_do} jobs: {nr_done} completed, {nr_run} running", flush=True, end=" ")
+ if verbosity.verbose:
+ print()
+ chg = False
+ if nr_done == nr_to_do:
+ break
+ while len(running) < nr_jobs and len(not_started):
+ w = not_started.pop(0)
+ running.append(w)
+ if verbosity.verbose:
+ print("Starting:", w.Command())
+ w.Start()
+ chg = True
+ if len(running):
+ time.sleep(0.1)
+ finished = []
+ not_finished = []
+ while len(running):
+ w = running.pop(0)
+ r = w.Poll()
+ if r == None:
+ not_finished.append(w)
+ continue
+ if r == 0:
+ if verbosity.verbose:
+ print("Finished:", w.Command())
+ finished.append(w)
+ chg = True
+ continue
+ if verbosity.normal and not verbosity.verbose:
+ print()
+ print("Job failed!\n return code:", r, "\n command: ", w.Command())
+ if w.pipe_to:
+ print(" piped to: ", w.pipe_to)
+ print("Killing outstanding jobs")
+ KillWork(not_finished, verbosity)
+ KillWork(running, verbosity)
+ return False
+ running = not_finished
+ done += finished
+ errorlist = []
+ for w in worklist:
+ errorlist += w.Errors()
+ if len(errorlist):
+ print("Errors:")
+ for e in errorlist:
+ print(e)
+ elif verbosity.normal:
+ print("\r"," "*50, "\rAll jobs finished successfully", flush=True)
+ return True
+
+def RunWork(worklist, nr_jobs=NumberOfCPUs(), verbosity=Verbosity()):
+ try:
+ return DoRunWork(worklist, nr_jobs, verbosity)
+ except:
+ for w in worklist:
+ w.Kill()
+ raise
+ return True
+
+def ReadHeader(perf, file_name):
+ return subprocess.Popen([perf, "script", "--header-only", "--input", file_name], stdout=subprocess.PIPE).stdout.read().decode("utf-8")
+
+def ParseHeader(hdr):
+ result = {}
+ lines = hdr.split("\n")
+ for line in lines:
+ if ":" in line and line[0] == "#":
+ pos = line.index(":")
+ name = line[1:pos-1].strip()
+ value = line[pos+1:].strip()
+ if name in result:
+ orig_name = name
+ nr = 2
+ while True:
+ name = f"{orig_name} {nr}"
+ if name not in result:
+ break
+ nr += 1
+ result[name] = value
+ return result
+
+def HeaderField(hdr_dict, hdr_fld):
+ if hdr_fld not in hdr_dict:
+ raise Exception(f"'{hdr_fld}' missing from header information")
+ return hdr_dict[hdr_fld]
+
+# Represent the position of an option within a command string
+# and provide the option value and/or remove the option
+class OptPos():
+
+ def Init(self, opt_element=-1, value_element=-1, opt_pos=-1, value_pos=-1, error=None):
+ self.opt_element = opt_element # list element that contains option
+ self.value_element = value_element # list element that contains option value
+ self.opt_pos = opt_pos # string position of option
+ self.value_pos = value_pos # string position of value
+ self.error = error # error message string
+
+ def __init__(self, args, short_name, long_name, default=None):
+ self.args = list(args)
+ self.default = default
+ n = 2 + len(long_name)
+ m = len(short_name)
+ pos = -1
+ for opt in args:
+ pos += 1
+ if m and opt[:2] == f"-{short_name}":
+ if len(opt) == 2:
+ if pos + 1 < len(args):
+ self.Init(pos, pos + 1, 0, 0)
+ else:
+ self.Init(error = f"-{short_name} option missing value")
+ else:
+ self.Init(pos, pos, 0, 2)
+ return
+ if opt[:n] == f"--{long_name}":
+ if len(opt) == n:
+ if pos + 1 < len(args):
+ self.Init(pos, pos + 1, 0, 0)
+ else:
+ self.Init(error = f"--{long_name} option missing value")
+ elif opt[n] == "=":
+ self.Init(pos, pos, 0, n + 1)
+ else:
+ self.Init(error = f"--{long_name} option expected '='")
+ return
+ if m and opt[:1] == "-" and opt[:2] != "--" and short_name in opt:
+ ipos = opt.index(short_name)
+ if "-" in opt[1:]:
+ hpos = opt[1:].index("-")
+ if hpos < ipos:
+ continue
+ if ipos + 1 == len(opt):
+ if pos + 1 < len(args):
+ self.Init(pos, pos + 1, ipos, 0)
+ else:
+ self.Init(error = f"-{short_name} option missing value")
+ else:
+ self.Init(pos, pos, ipos, ipos + 1)
+ return
+ self.Init()
+
+ def Value(self):
+ if self.opt_element >= 0:
+ if self.opt_element != self.value_element:
+ return self.args[self.value_element]
+ else:
+ return self.args[self.value_element][self.value_pos:]
+ return self.default
+
+ def Remove(self, args):
+ if self.opt_element == -1:
+ return
+ if self.opt_element != self.value_element:
+ del args[self.value_element]
+ if self.opt_pos:
+ args[self.opt_element] = args[self.opt_element][:self.opt_pos]
+ else:
+ del args[self.opt_element]
+
+def DetermineInputFileName(cmd):
+ p = OptPos(cmd, "i", "input", "perf.data")
+ if p.error:
+ raise Exception(f"perf command {p.error}")
+ file_name = p.Value()
+ if not os.path.exists(file_name):
+ raise Exception(f"perf command input file '{file_name}' not found")
+ return file_name
+
+def ReadOption(args, short_name, long_name, err_prefix, remove=False):
+ p = OptPos(args, short_name, long_name)
+ if p.error:
+ raise Exception(f"{err_prefix}{p.error}")
+ value = p.Value()
+ if remove:
+ p.Remove(args)
+ return value
+
+def ExtractOption(args, short_name, long_name, err_prefix):
+ return ReadOption(args, short_name, long_name, err_prefix, True)
+
+def ReadPerfOption(args, short_name, long_name):
+ return ReadOption(args, short_name, long_name, "perf command ")
+
+def ExtractPerfOption(args, short_name, long_name):
+ return ExtractOption(args, short_name, long_name, "perf command ")
+
+def PerfDoubleQuickCommands(cmd, file_name):
+ cpu_str = ReadPerfOption(cmd, "C", "cpu")
+ time_str = ReadPerfOption(cmd, "", "time")
+ # Use double-quick sampling to determine trace data density
+ times_cmd = ["perf", "script", "--ns", "--input", file_name, "--itrace=qqi"]
+ if cpu_str != None and cpu_str != "":
+ times_cmd.append(f"--cpu={cpu_str}")
+ if time_str != None and time_str != "":
+ times_cmd.append(f"--time={time_str}")
+ cnts_cmd = list(times_cmd)
+ cnts_cmd.append("-Fcpu")
+ times_cmd.append("-Fcpu,time")
+ return cnts_cmd, times_cmd
+
+class CPUTimeRange():
+ def __init__(self, cpu):
+ self.cpu = cpu
+ self.sample_cnt = 0
+ self.time_ranges = None
+ self.interval = 0
+ self.interval_remaining = 0
+ self.remaining = 0
+ self.tr_pos = 0
+
+def CalcTimeRangesByCPU(line, cpu, cpu_time_ranges, max_time):
+ cpu_time_range = cpu_time_ranges[cpu]
+ cpu_time_range.remaining -= 1
+ cpu_time_range.interval_remaining -= 1
+ if cpu_time_range.remaining == 0:
+ cpu_time_range.time_ranges[cpu_time_range.tr_pos][1] = max_time
+ return
+ if cpu_time_range.interval_remaining == 0:
+ time = TimeVal(line[1][:-1], 0)
+ time_ranges = cpu_time_range.time_ranges
+ time_ranges[cpu_time_range.tr_pos][1] = time - 1
+ time_ranges.append([time, max_time])
+ cpu_time_range.tr_pos += 1
+ cpu_time_range.interval_remaining = cpu_time_range.interval
+
+def CountSamplesByCPU(line, cpu, cpu_time_ranges):
+ try:
+ cpu_time_ranges[cpu].sample_cnt += 1
+ except:
+ print("exception")
+ print("cpu", cpu)
+ print("len(cpu_time_ranges)", len(cpu_time_ranges))
+ raise
+
+def ProcessCommandOutputLines(cmd, per_cpu, fn, *x):
+ # Assume CPU number is at beginning of line and enclosed by []
+ pat = re.compile(r"\s*\[[0-9]+\]")
+ p = subprocess.Popen(cmd, stdout=subprocess.PIPE)
+ while True:
+ if line := p.stdout.readline():
+ line = line.decode("utf-8")
+ if pat.match(line):
+ line = line.split()
+ if per_cpu:
+ # Assumes CPU number is enclosed by []
+ cpu = int(line[0][1:-1])
+ else:
+ cpu = 0
+ fn(line, cpu, *x)
+ else:
+ break
+ p.wait()
+
+def IntersectTimeRanges(new_time_ranges, time_ranges):
+ pos = 0
+ new_pos = 0
+ # Can assume len(time_ranges) != 0 and len(new_time_ranges) != 0
+ # Note also, there *must* be at least one intersection.
+ while pos < len(time_ranges) and new_pos < len(new_time_ranges):
+ # new end < old start => no intersection, remove new
+ if new_time_ranges[new_pos][1] < time_ranges[pos][0]:
+ del new_time_ranges[new_pos]
+ continue
+ # new start > old end => no intersection, check next
+ if new_time_ranges[new_pos][0] > time_ranges[pos][1]:
+ pos += 1
+ if pos < len(time_ranges):
+ continue
+ # no next, so remove remaining
+ while new_pos < len(new_time_ranges):
+ del new_time_ranges[new_pos]
+ return
+ # Found an intersection
+ # new start < old start => adjust new start = old start
+ if new_time_ranges[new_pos][0] < time_ranges[pos][0]:
+ new_time_ranges[new_pos][0] = time_ranges[pos][0]
+ # new end > old end => keep the overlap, insert the remainder
+ if new_time_ranges[new_pos][1] > time_ranges[pos][1]:
+ r = [ time_ranges[pos][1] + 1, new_time_ranges[new_pos][1] ]
+ new_time_ranges[new_pos][1] = time_ranges[pos][1]
+ new_pos += 1
+ new_time_ranges.insert(new_pos, r)
+ continue
+ # new [start, end] is within old [start, end]
+ new_pos += 1
+
+def SplitTimeRangesByTraceDataDensity(time_ranges, cpus, nr, cmd, file_name, per_cpu, min_size, min_interval, verbosity):
+ if verbosity.normal:
+ print("\rAnalyzing...", flush=True, end=" ")
+ if verbosity.verbose:
+ print()
+ cnts_cmd, times_cmd = PerfDoubleQuickCommands(cmd, file_name)
+
+ nr_cpus = cpus[-1] + 1 if per_cpu else 1
+ if per_cpu:
+ nr_cpus = cpus[-1] + 1
+ cpu_time_ranges = [ CPUTimeRange(cpu) for cpu in range(nr_cpus) ]
+ else:
+ nr_cpus = 1
+ cpu_time_ranges = [ CPUTimeRange(-1) ]
+
+ if verbosity.debug:
+ print("nr_cpus", nr_cpus)
+ print("cnts_cmd", cnts_cmd)
+ print("times_cmd", times_cmd)
+
+ # Count the number of "double quick" samples per CPU
+ ProcessCommandOutputLines(cnts_cmd, per_cpu, CountSamplesByCPU, cpu_time_ranges)
+
+ tot = 0
+ mx = 0
+ for cpu_time_range in cpu_time_ranges:
+ cnt = cpu_time_range.sample_cnt
+ tot += cnt
+ if cnt > mx:
+ mx = cnt
+ if verbosity.debug:
+ print("cpu:", cpu_time_range.cpu, "sample_cnt", cnt)
+
+ if min_size < 1:
+ min_size = 1
+
+ if mx < min_size:
+ # Too little data to be worth splitting
+ if verbosity.debug:
+ print("Too little data to split by time")
+ if nr == 0:
+ nr = 1
+ return [ SplitTimeRangesIntoN(time_ranges, nr, min_interval) ]
+
+ if nr:
+ divisor = nr
+ min_size = 1
+ else:
+ divisor = NumberOfCPUs()
+
+ interval = int(round(tot / divisor, 0))
+ if interval < min_size:
+ interval = min_size
+
+ if verbosity.debug:
+ print("divisor", divisor)
+ print("min_size", min_size)
+ print("interval", interval)
+
+ min_time = time_ranges[0][0]
+ max_time = time_ranges[-1][1]
+
+ for cpu_time_range in cpu_time_ranges:
+ cnt = cpu_time_range.sample_cnt
+ if cnt == 0:
+ cpu_time_range.time_ranges = copy.deepcopy(time_ranges)
+ continue
+ # Adjust target interval for CPU to give approximately equal interval sizes
+ # Determine number of intervals, rounding to nearest integer
+ n = int(round(cnt / interval, 0))
+ if n < 1:
+ n = 1
+ # Determine interval size, rounding up
+ d, m = divmod(cnt, n)
+ if m:
+ d += 1
+ cpu_time_range.interval = d
+ cpu_time_range.interval_remaining = d
+ cpu_time_range.remaining = cnt
+ # Init. time ranges for each CPU with the start time
+ cpu_time_range.time_ranges = [ [min_time, max_time] ]
+
+ # Set time ranges so that the same number of "double quick" samples
+ # will fall into each time range.
+ ProcessCommandOutputLines(times_cmd, per_cpu, CalcTimeRangesByCPU, cpu_time_ranges, max_time)
+
+ for cpu_time_range in cpu_time_ranges:
+ if cpu_time_range.sample_cnt:
+ IntersectTimeRanges(cpu_time_range.time_ranges, time_ranges)
+
+ return [cpu_time_ranges[cpu].time_ranges for cpu in cpus]
+
+def SplitSingleTimeRangeIntoN(time_range, n):
+ if n <= 1:
+ return [time_range]
+ start = time_range[0]
+ end = time_range[1]
+ duration = int((end - start + 1) / n)
+ if duration < 1:
+ return [time_range]
+ time_ranges = []
+ for i in range(n):
+ time_ranges.append([start, start + duration - 1])
+ start += duration
+ time_ranges[-1][1] = end
+ return time_ranges
+
+def TimeRangeDuration(r):
+ return r[1] - r[0] + 1
+
+def TotalDuration(time_ranges):
+ duration = 0
+ for r in time_ranges:
+ duration += TimeRangeDuration(r)
+ return duration
+
+def SplitTimeRangesByInterval(time_ranges, interval):
+ new_ranges = []
+ for r in time_ranges:
+ duration = TimeRangeDuration(r)
+ n = duration / interval
+ n = int(round(n, 0))
+ new_ranges += SplitSingleTimeRangeIntoN(r, n)
+ return new_ranges
+
+def SplitTimeRangesIntoN(time_ranges, n, min_interval):
+ if n <= len(time_ranges):
+ return time_ranges
+ duration = TotalDuration(time_ranges)
+ interval = duration / n
+ if interval < min_interval:
+ interval = min_interval
+ return SplitTimeRangesByInterval(time_ranges, interval)
+
+def RecombineTimeRanges(tr):
+ new_tr = copy.deepcopy(tr)
+ n = len(new_tr)
+ i = 1
+ while i < len(new_tr):
+ # if prev end + 1 == cur start, combine them
+ if new_tr[i - 1][1] + 1 == new_tr[i][0]:
+ new_tr[i][0] = new_tr[i - 1][0]
+ del new_tr[i - 1]
+ else:
+ i += 1
+ return new_tr
+
+def OpenTimeRangeEnds(time_ranges, min_time, max_time):
+ if time_ranges[0][0] <= min_time:
+ time_ranges[0][0] = None
+ if time_ranges[-1][1] >= max_time:
+ time_ranges[-1][1] = None
+
+def BadTimeStr(time_str):
+ raise Exception(f"perf command bad time option: '{time_str}'\nCheck also 'time of first sample' and 'time of last sample' in perf script --header-only")
+
+def ValidateTimeRanges(time_ranges, time_str):
+ n = len(time_ranges)
+ for i in range(n):
+ start = time_ranges[i][0]
+ end = time_ranges[i][1]
+ if i != 0 and start <= time_ranges[i - 1][1]:
+ BadTimeStr(time_str)
+ if start > end:
+ BadTimeStr(time_str)
+
+def TimeVal(s, dflt):
+ s = s.strip()
+ if s == "":
+ return dflt
+ a = s.split(".")
+ if len(a) > 2:
+ raise Exception(f"Bad time value'{s}'")
+ x = int(a[0])
+ if x < 0:
+ raise Exception("Negative time not allowed")
+ x *= 1000000000
+ if len(a) > 1:
+ x += int((a[1] + "000000000")[:9])
+ return x
+
+def BadCPUStr(cpu_str):
+ raise Exception(f"perf command bad cpu option: '{cpu_str}'\nCheck also 'nrcpus avail' in perf script --header-only")
+
+def ParseTimeStr(time_str, min_time, max_time):
+ if time_str == None or time_str == "":
+ return [[min_time, max_time]]
+ time_ranges = []
+ for r in time_str.split():
+ a = r.split(",")
+ if len(a) != 2:
+ BadTimeStr(time_str)
+ try:
+ start = TimeVal(a[0], min_time)
+ end = TimeVal(a[1], max_time)
+ except:
+ BadTimeStr(time_str)
+ time_ranges.append([start, end])
+ ValidateTimeRanges(time_ranges, time_str)
+ return time_ranges
+
+def ParseCPUStr(cpu_str, nr_cpus):
+ if cpu_str == None or cpu_str == "":
+ return [-1]
+ cpus = []
+ for r in cpu_str.split(","):
+ a = r.split("-")
+ if len(a) < 1 or len(a) > 2:
+ BadCPUStr(cpu_str)
+ try:
+ start = int(a[0].strip())
+ if len(a) > 1:
+ end = int(a[1].strip())
+ else:
+ end = start
+ except:
+ BadCPUStr(cpu_str)
+ if start < 0 or end < 0 or end < start or end >= nr_cpus:
+ BadCPUStr(cpu_str)
+ cpus.extend(range(start, end + 1))
+ cpus = list(set(cpus)) # Remove duplicates
+ cpus.sort()
+ return cpus
+
+class ParallelPerf():
+
+ def __init__(self, a):
+ for arg_name in vars(a):
+ setattr(self, arg_name, getattr(a, arg_name))
+ self.orig_nr = self.nr
+ self.orig_cmd = list(self.cmd)
+ self.perf = self.cmd[0]
+ if os.path.exists(self.output_dir):
+ raise Exception(f"Output '{self.output_dir}' already exists")
+ if self.jobs < 0 or self.nr < 0 or self.interval < 0:
+ raise Exception("Bad options (negative values): try -h option for help")
+ if self.nr != 0 and self.interval != 0:
+ raise Exception("Cannot specify number of time subdivisions and time interval")
+ if self.jobs == 0:
+ self.jobs = NumberOfCPUs()
+ if self.nr == 0 and self.interval == 0:
+ if self.per_cpu:
+ self.nr = 1
+ else:
+ self.nr = self.jobs
+
+ def Init(self):
+ if self.verbosity.debug:
+ print("cmd", self.cmd)
+ self.file_name = DetermineInputFileName(self.cmd)
+ self.hdr = ReadHeader(self.perf, self.file_name)
+ self.hdr_dict = ParseHeader(self.hdr)
+ self.cmd_line = HeaderField(self.hdr_dict, "cmdline")
+
+ def ExtractTimeInfo(self):
+ self.min_time = TimeVal(HeaderField(self.hdr_dict, "time of first sample"), 0)
+ self.max_time = TimeVal(HeaderField(self.hdr_dict, "time of last sample"), 0)
+ self.time_str = ExtractPerfOption(self.cmd, "", "time")
+ self.time_ranges = ParseTimeStr(self.time_str, self.min_time, self.max_time)
+ if self.verbosity.debug:
+ print("time_ranges", self.time_ranges)
+
+ def ExtractCPUInfo(self):
+ if self.per_cpu:
+ nr_cpus = int(HeaderField(self.hdr_dict, "nrcpus avail"))
+ self.cpu_str = ExtractPerfOption(self.cmd, "C", "cpu")
+ if self.cpu_str == None or self.cpu_str == "":
+ self.cpus = [ x for x in range(nr_cpus) ]
+ else:
+ self.cpus = ParseCPUStr(self.cpu_str, nr_cpus)
+ else:
+ self.cpu_str = None
+ self.cpus = [-1]
+ if self.verbosity.debug:
+ print("cpus", self.cpus)
+
+ def IsIntelPT(self):
+ return self.cmd_line.find("intel_pt") >= 0
+
+ def SplitTimeRanges(self):
+ if self.IsIntelPT() and self.interval == 0:
+ self.split_time_ranges_for_each_cpu = \
+ SplitTimeRangesByTraceDataDensity(self.time_ranges, self.cpus, self.orig_nr,
+ self.orig_cmd, self.file_name, self.per_cpu,
+ self.min_size, self.min_interval, self.verbosity)
+ elif self.nr:
+ self.split_time_ranges_for_each_cpu = [ SplitTimeRangesIntoN(self.time_ranges, self.nr, self.min_interval) ]
+ else:
+ self.split_time_ranges_for_each_cpu = [ SplitTimeRangesByInterval(self.time_ranges, self.interval) ]
+
+ def CheckTimeRanges(self):
+ for tr in self.split_time_ranges_for_each_cpu:
+ # Re-combined time ranges should be the same
+ new_tr = RecombineTimeRanges(tr)
+ if new_tr != self.time_ranges:
+ if self.verbosity.debug:
+ print("tr", tr)
+ print("new_tr", new_tr)
+ raise Exception("Self test failed!")
+
+ def OpenTimeRangeEnds(self):
+ for time_ranges in self.split_time_ranges_for_each_cpu:
+ OpenTimeRangeEnds(time_ranges, self.min_time, self.max_time)
+
+ def CreateWorkList(self):
+ self.worklist = CreateWorkList(self.cmd, self.pipe_to, self.output_dir, self.cpus, self.split_time_ranges_for_each_cpu)
+
+ def PerfDataRecordedPerCPU(self):
+ if "--per-thread" in self.cmd_line.split():
+ return False
+ return True
+
+ def DefaultToPerCPU(self):
+ # --no-per-cpu option takes precedence
+ if self.no_per_cpu:
+ return False
+ if not self.PerfDataRecordedPerCPU():
+ return False
+ # Default to per-cpu for Intel PT data that was recorded per-cpu,
+ # because decoding can be done for each CPU separately.
+ if self.IsIntelPT():
+ return True
+ return False
+
+ def Config(self):
+ self.Init()
+ self.ExtractTimeInfo()
+ if not self.per_cpu:
+ self.per_cpu = self.DefaultToPerCPU()
+ if self.verbosity.debug:
+ print("per_cpu", self.per_cpu)
+ self.ExtractCPUInfo()
+ self.SplitTimeRanges()
+ if self.verbosity.self_test:
+ self.CheckTimeRanges()
+ # Prefer open-ended time range to starting / ending with min_time / max_time resp.
+ self.OpenTimeRangeEnds()
+ self.CreateWorkList()
+
+ def Run(self):
+ if self.dry_run:
+ print(len(self.worklist),"jobs:")
+ for w in self.worklist:
+ print(w.Command())
+ return True
+ result = RunWork(self.worklist, self.jobs, verbosity=self.verbosity)
+ if self.verbosity.verbose:
+ print(glb_prog_name, "done")
+ return result
+
+def RunParallelPerf(a):
+ pp = ParallelPerf(a)
+ pp.Config()
+ return pp.Run()
+
+def Main(args):
+ ap = argparse.ArgumentParser(
+ prog=glb_prog_name, formatter_class = argparse.RawDescriptionHelpFormatter,
+ description =
+"""
+Run a perf script command multiple times in parallel, using perf script options
+--cpu and --time so that each job processes a different chunk of the data.
+""",
+ epilog =
+"""
+Follow the options by '--' and then the perf script command e.g.
+
+ $ perf record -a -- sleep 10
+ $ parallel-perf.py --nr=4 -- perf script --ns
+ All jobs finished successfully
+ $ tree parallel-perf-output/
+ parallel-perf-output/
+ ├── time-range-0
+ │   ├── cmd.txt
+ │   └── out.txt
+ ├── time-range-1
+ │   ├── cmd.txt
+ │   └── out.txt
+ ├── time-range-2
+ │   ├── cmd.txt
+ │   └── out.txt
+ └── time-range-3
+ ├── cmd.txt
+ └── out.txt
+ $ find parallel-perf-output -name cmd.txt | sort | xargs grep -H .
+ parallel-perf-output/time-range-0/cmd.txt:perf script --time=,9466.504461499 --ns
+ parallel-perf-output/time-range-1/cmd.txt:perf script --time=9466.504461500,9469.005396999 --ns
+ parallel-perf-output/time-range-2/cmd.txt:perf script --time=9469.005397000,9471.506332499 --ns
+ parallel-perf-output/time-range-3/cmd.txt:perf script --time=9471.506332500, --ns
+
+Any perf script command can be used, including the use of perf script options
+--dlfilter and --script, so that the benefit of running parallel jobs
+naturally extends to them also.
+
+If option --pipe-to is used, standard output is first piped through that
+command. Beware, if the command fails (e.g. grep with no matches), it will be
+considered a fatal error.
+
+Final standard output is redirected to files named out.txt in separate
+subdirectories under the output directory. Similarly, standard error is
+written to files named err.txt. In addition, files named cmd.txt contain the
+corresponding perf script command. After processing, err.txt files are removed
+if they are empty.
+
+If any job exits with a non-zero exit code, then all jobs are killed and no
+more are started. A message is printed if any job results in a non-empty
+err.txt file.
+
+There is a separate output subdirectory for each time range. If the --per-cpu
+option is used, these are further grouped under cpu-n subdirectories, e.g.
+
+ $ parallel-perf.py --per-cpu --nr=2 -- perf script --ns --cpu=0,1
+ All jobs finished successfully
+ $ tree parallel-perf-output
+ parallel-perf-output/
+ ├── cpu-0
+ │   ├── time-range-0
+ │   │   ├── cmd.txt
+ │   │   └── out.txt
+ │   └── time-range-1
+ │   ├── cmd.txt
+ │   └── out.txt
+ └── cpu-1
+ ├── time-range-0
+ │   ├── cmd.txt
+ │   └── out.txt
+ └── time-range-1
+ ├── cmd.txt
+ └── out.txt
+ $ find parallel-perf-output -name cmd.txt | sort | xargs grep -H .
+ parallel-perf-output/cpu-0/time-range-0/cmd.txt:perf script --cpu=0 --time=,9469.005396999 --ns
+ parallel-perf-output/cpu-0/time-range-1/cmd.txt:perf script --cpu=0 --time=9469.005397000, --ns
+ parallel-perf-output/cpu-1/time-range-0/cmd.txt:perf script --cpu=1 --time=,9469.005396999 --ns
+ parallel-perf-output/cpu-1/time-range-1/cmd.txt:perf script --cpu=1 --time=9469.005397000, --ns
+
+Subdivisions of time range, and cpus if the --per-cpu option is used, are
+expressed by the --time and --cpu perf script options respectively. If the
+supplied perf script command has a --time option, then that time range is
+subdivided, otherwise the time range given by 'time of first sample' to
+'time of last sample' is used (refer perf script --header-only). Similarly, the
+supplied perf script command may provide a --cpu option, and only those CPUs
+will be processed.
+
+To prevent time intervals becoming too small, the --min-interval option can
+be used.
+
+Note there is special handling for processing Intel PT traces. If an interval is
+not specified and the perf record command contained the intel_pt event, then the
+time range will be subdivided in order to produce subdivisions that contain
+approximately the same amount of trace data. That is accomplished by counting
+double-quick (--itrace=qqi) samples, and choosing time ranges that encompass
+approximately the same number of samples. In that case, time ranges may not be
+the same for each CPU processed. For Intel PT, --per-cpu is the default, but
+that can be overridden by --no-per-cpu. Note, for Intel PT, double-quick
+decoding produces 1 sample for each PSB synchronization packet, which in turn
+come after a certain number of bytes output, determined by psb_period (refer
+perf Intel PT documentation). The minimum number of double-quick samples that
+will define a time range can be set by the --min_size option, which defaults to
+64.
+""")
+ ap.add_argument("-o", "--output-dir", default="parallel-perf-output", help="output directory (default 'parallel-perf-output')")
+ ap.add_argument("-j", "--jobs", type=int, default=0, help="maximum number of jobs to run in parallel at one time (default is the number of CPUs)")
+ ap.add_argument("-n", "--nr", type=int, default=0, help="number of time subdivisions (default is the number of jobs)")
+ ap.add_argument("-i", "--interval", type=float, default=0, help="subdivide the time range using this time interval (in seconds e.g. 0.1 for a tenth of a second)")
+ ap.add_argument("-c", "--per-cpu", action="store_true", help="process data for each CPU in parallel")
+ ap.add_argument("-m", "--min-interval", type=float, default=glb_min_interval, help=f"minimum interval (default {glb_min_interval} seconds)")
+ ap.add_argument("-p", "--pipe-to", help="command to pipe output to (optional)")
+ ap.add_argument("-N", "--no-per-cpu", action="store_true", help="do not process data for each CPU in parallel")
+ ap.add_argument("-b", "--min_size", type=int, default=glb_min_samples, help="minimum data size (for Intel PT in PSBs)")
+ ap.add_argument("-D", "--dry-run", action="store_true", help="do not run any jobs, just show the perf script commands")
+ ap.add_argument("-q", "--quiet", action="store_true", help="do not print any messages except errors")
+ ap.add_argument("-v", "--verbose", action="store_true", help="print more messages")
+ ap.add_argument("-d", "--debug", action="store_true", help="print debugging messages")
+ cmd_line = list(args)
+ try:
+ split_pos = cmd_line.index("--")
+ cmd = cmd_line[split_pos + 1:]
+ args = cmd_line[:split_pos]
+ except:
+ cmd = None
+ args = cmd_line
+ a = ap.parse_args(args=args[1:])
+ a.cmd = cmd
+ a.verbosity = Verbosity(a.quiet, a.verbose, a.debug)
+ try:
+ if a.cmd == None:
+ if len(args) <= 1:
+ ap.print_help()
+ return True
+ raise Exception("Command line must contain '--' before perf command")
+ return RunParallelPerf(a)
+ except Exception as e:
+ print("Fatal error: ", str(e))
+ if a.debug:
+ raise
+ return False
+
+if __name__ == "__main__":
+ if not Main(sys.argv):
+ sys.exit(1)