diff options
Diffstat (limited to 'gnu/llvm/lldb/examples/python/symbolication.py')
-rwxr-xr-x | gnu/llvm/lldb/examples/python/symbolication.py | 724 |
1 files changed, 724 insertions, 0 deletions
diff --git a/gnu/llvm/lldb/examples/python/symbolication.py b/gnu/llvm/lldb/examples/python/symbolication.py new file mode 100755 index 00000000000..a6daa802cda --- /dev/null +++ b/gnu/llvm/lldb/examples/python/symbolication.py @@ -0,0 +1,724 @@ +#!/usr/bin/python + +#---------------------------------------------------------------------- +# Be sure to add the python path that points to the LLDB shared library. +# +# To use this in the embedded python interpreter using "lldb": +# +# cd /path/containing/crashlog.py +# lldb +# (lldb) script import crashlog +# "crashlog" command installed, type "crashlog --help" for detailed help +# (lldb) crashlog ~/Library/Logs/DiagnosticReports/a.crash +# +# The benefit of running the crashlog command inside lldb in the +# embedded python interpreter is when the command completes, there +# will be a target with all of the files loaded at the locations +# described in the crash log. Only the files that have stack frames +# in the backtrace will be loaded unless the "--load-all" option +# has been specified. This allows users to explore the program in the +# state it was in right at crash time. +# +# On MacOSX csh, tcsh: +# ( setenv PYTHONPATH /path/to/LLDB.framework/Resources/Python ; ./crashlog.py ~/Library/Logs/DiagnosticReports/a.crash ) +# +# On MacOSX sh, bash: +# PYTHONPATH=/path/to/LLDB.framework/Resources/Python ./crashlog.py ~/Library/Logs/DiagnosticReports/a.crash +#---------------------------------------------------------------------- + +from __future__ import print_function +import lldb +import optparse +import os +import plistlib +import re +import shlex +import sys +import time +import uuid + + +class Address: + """Class that represents an address that will be symbolicated""" + + def __init__(self, target, load_addr): + self.target = target + self.load_addr = load_addr # The load address that this object represents + # the resolved lldb.SBAddress (if any), named so_addr for + # section/offset address + self.so_addr = None + self.sym_ctx = None # The cached symbol context for this address + # Any original textual description of this address to be used as a + # backup in case symbolication fails + self.description = None + self.symbolication = None # The cached symbolicated string that describes this address + self.inlined = False + + def __str__(self): + s = "%#16.16x" % (self.load_addr) + if self.symbolication: + s += " %s" % (self.symbolication) + elif self.description: + s += " %s" % (self.description) + elif self.so_addr: + s += " %s" % (self.so_addr) + return s + + def resolve_addr(self): + if self.so_addr is None: + self.so_addr = self.target.ResolveLoadAddress(self.load_addr) + return self.so_addr + + def is_inlined(self): + return self.inlined + + def get_symbol_context(self): + if self.sym_ctx is None: + sb_addr = self.resolve_addr() + if sb_addr: + self.sym_ctx = self.target.ResolveSymbolContextForAddress( + sb_addr, lldb.eSymbolContextEverything) + else: + self.sym_ctx = lldb.SBSymbolContext() + return self.sym_ctx + + def get_instructions(self): + sym_ctx = self.get_symbol_context() + if sym_ctx: + function = sym_ctx.GetFunction() + if function: + return function.GetInstructions(self.target) + return sym_ctx.GetSymbol().GetInstructions(self.target) + return None + + def symbolicate(self, verbose=False): + if self.symbolication is None: + self.symbolication = '' + self.inlined = False + sym_ctx = self.get_symbol_context() + if sym_ctx: + module = sym_ctx.GetModule() + if module: + # Print full source file path in verbose mode + if verbose: + self.symbolication += str(module.GetFileSpec()) + '`' + else: + self.symbolication += module.GetFileSpec().GetFilename() + '`' + function_start_load_addr = -1 + function = sym_ctx.GetFunction() + block = sym_ctx.GetBlock() + line_entry = sym_ctx.GetLineEntry() + symbol = sym_ctx.GetSymbol() + inlined_block = block.GetContainingInlinedBlock() + if function: + self.symbolication += function.GetName() + + if inlined_block: + self.inlined = True + self.symbolication += ' [inlined] ' + \ + inlined_block.GetInlinedName() + block_range_idx = inlined_block.GetRangeIndexForBlockAddress( + self.so_addr) + if block_range_idx < lldb.UINT32_MAX: + block_range_start_addr = inlined_block.GetRangeStartAddress( + block_range_idx) + function_start_load_addr = block_range_start_addr.GetLoadAddress( + self.target) + if function_start_load_addr == -1: + function_start_load_addr = function.GetStartAddress().GetLoadAddress(self.target) + elif symbol: + self.symbolication += symbol.GetName() + function_start_load_addr = symbol.GetStartAddress().GetLoadAddress(self.target) + else: + self.symbolication = '' + return False + + # Dump the offset from the current function or symbol if it + # is non zero + function_offset = self.load_addr - function_start_load_addr + if function_offset > 0: + self.symbolication += " + %u" % (function_offset) + elif function_offset < 0: + self.symbolication += " %i (invalid negative offset, file a bug) " % function_offset + + # Print out any line information if any is available + if line_entry.GetFileSpec(): + # Print full source file path in verbose mode + if verbose: + self.symbolication += ' at %s' % line_entry.GetFileSpec() + else: + self.symbolication += ' at %s' % line_entry.GetFileSpec().GetFilename() + self.symbolication += ':%u' % line_entry.GetLine() + column = line_entry.GetColumn() + if column > 0: + self.symbolication += ':%u' % column + return True + return False + + +class Section: + """Class that represents an load address range""" + sect_info_regex = re.compile('(?P<name>[^=]+)=(?P<range>.*)') + addr_regex = re.compile('^\s*(?P<start>0x[0-9A-Fa-f]+)\s*$') + range_regex = re.compile( + '^\s*(?P<start>0x[0-9A-Fa-f]+)\s*(?P<op>[-+])\s*(?P<end>0x[0-9A-Fa-f]+)\s*$') + + def __init__(self, start_addr=None, end_addr=None, name=None): + self.start_addr = start_addr + self.end_addr = end_addr + self.name = name + + @classmethod + def InitWithSBTargetAndSBSection(cls, target, section): + sect_load_addr = section.GetLoadAddress(target) + if sect_load_addr != lldb.LLDB_INVALID_ADDRESS: + obj = cls( + sect_load_addr, + sect_load_addr + + section.size, + section.name) + return obj + else: + return None + + def contains(self, addr): + return self.start_addr <= addr and addr < self.end_addr + + def set_from_string(self, s): + match = self.sect_info_regex.match(s) + if match: + self.name = match.group('name') + range_str = match.group('range') + addr_match = self.addr_regex.match(range_str) + if addr_match: + self.start_addr = int(addr_match.group('start'), 16) + self.end_addr = None + return True + + range_match = self.range_regex.match(range_str) + if range_match: + self.start_addr = int(range_match.group('start'), 16) + self.end_addr = int(range_match.group('end'), 16) + op = range_match.group('op') + if op == '+': + self.end_addr += self.start_addr + return True + print('error: invalid section info string "%s"' % s) + print('Valid section info formats are:') + print('Format Example Description') + print('--------------------- -----------------------------------------------') + print('<name>=<base> __TEXT=0x123000 Section from base address only') + print('<name>=<base>-<end> __TEXT=0x123000-0x124000 Section from base address and end address') + print('<name>=<base>+<size> __TEXT=0x123000+0x1000 Section from base address and size') + return False + + def __str__(self): + if self.name: + if self.end_addr is not None: + if self.start_addr is not None: + return "%s=[0x%16.16x - 0x%16.16x)" % ( + self.name, self.start_addr, self.end_addr) + else: + if self.start_addr is not None: + return "%s=0x%16.16x" % (self.name, self.start_addr) + return self.name + return "<invalid>" + + +class Image: + """A class that represents an executable image and any associated data""" + + def __init__(self, path, uuid=None): + self.path = path + self.resolved_path = None + self.resolved = False + self.unavailable = False + self.uuid = uuid + self.section_infos = list() + self.identifier = None + self.version = None + self.arch = None + self.module = None + self.symfile = None + self.slide = None + + @classmethod + def InitWithSBTargetAndSBModule(cls, target, module): + '''Initialize this Image object with a module from a target.''' + obj = cls(module.file.fullpath, module.uuid) + obj.resolved_path = module.platform_file.fullpath + obj.resolved = True + for section in module.sections: + symb_section = Section.InitWithSBTargetAndSBSection( + target, section) + if symb_section: + obj.section_infos.append(symb_section) + obj.arch = module.triple + obj.module = module + obj.symfile = None + obj.slide = None + return obj + + def dump(self, prefix): + print("%s%s" % (prefix, self)) + + def debug_dump(self): + print('path = "%s"' % (self.path)) + print('resolved_path = "%s"' % (self.resolved_path)) + print('resolved = %i' % (self.resolved)) + print('unavailable = %i' % (self.unavailable)) + print('uuid = %s' % (self.uuid)) + print('section_infos = %s' % (self.section_infos)) + print('identifier = "%s"' % (self.identifier)) + print('version = %s' % (self.version)) + print('arch = %s' % (self.arch)) + print('module = %s' % (self.module)) + print('symfile = "%s"' % (self.symfile)) + print('slide = %i (0x%x)' % (self.slide, self.slide)) + + def __str__(self): + s = '' + if self.uuid: + s += "%s " % (self.get_uuid()) + if self.arch: + s += "%s " % (self.arch) + if self.version: + s += "%s " % (self.version) + resolved_path = self.get_resolved_path() + if resolved_path: + s += "%s " % (resolved_path) + for section_info in self.section_infos: + s += ", %s" % (section_info) + if self.slide is not None: + s += ', slide = 0x%16.16x' % self.slide + return s + + def add_section(self, section): + # print "added '%s' to '%s'" % (section, self.path) + self.section_infos.append(section) + + def get_section_containing_load_addr(self, load_addr): + for section_info in self.section_infos: + if section_info.contains(load_addr): + return section_info + return None + + def get_resolved_path(self): + if self.resolved_path: + return self.resolved_path + elif self.path: + return self.path + return None + + def get_resolved_path_basename(self): + path = self.get_resolved_path() + if path: + return os.path.basename(path) + return None + + def symfile_basename(self): + if self.symfile: + return os.path.basename(self.symfile) + return None + + def has_section_load_info(self): + return self.section_infos or self.slide is not None + + def load_module(self, target): + if self.unavailable: + return None # We already warned that we couldn't find this module, so don't return an error string + # Load this module into "target" using the section infos to + # set the section load addresses + if self.has_section_load_info(): + if target: + if self.module: + if self.section_infos: + num_sections_loaded = 0 + for section_info in self.section_infos: + if section_info.name: + section = self.module.FindSection( + section_info.name) + if section: + error = target.SetSectionLoadAddress( + section, section_info.start_addr) + if error.Success(): + num_sections_loaded += 1 + else: + return 'error: %s' % error.GetCString() + else: + return 'error: unable to find the section named "%s"' % section_info.name + else: + return 'error: unable to find "%s" section in "%s"' % ( + range.name, self.get_resolved_path()) + if num_sections_loaded == 0: + return 'error: no sections were successfully loaded' + else: + err = target.SetModuleLoadAddress( + self.module, self.slide) + if err.Fail(): + return err.GetCString() + return None + else: + return 'error: invalid module' + else: + return 'error: invalid target' + else: + return 'error: no section infos' + + def add_module(self, target): + '''Add the Image described in this object to "target" and load the sections if "load" is True.''' + if target: + # Try and find using UUID only first so that paths need not match + # up + uuid_str = self.get_normalized_uuid_string() + if uuid_str: + self.module = target.AddModule(None, None, uuid_str) + if not self.module: + self.locate_module_and_debug_symbols() + if self.unavailable: + return None + resolved_path = self.get_resolved_path() + self.module = target.AddModule( + resolved_path, str(self.arch), uuid_str, self.symfile) + if not self.module: + return 'error: unable to get module for (%s) "%s"' % ( + self.arch, self.get_resolved_path()) + if self.has_section_load_info(): + return self.load_module(target) + else: + return None # No sections, the module was added to the target, so success + else: + return 'error: invalid target' + + def locate_module_and_debug_symbols(self): + # By default, just use the paths that were supplied in: + # self.path + # self.resolved_path + # self.module + # self.symfile + # Subclasses can inherit from this class and override this function + self.resolved = True + return True + + def get_uuid(self): + if not self.uuid and self.module: + self.uuid = uuid.UUID(self.module.GetUUIDString()) + return self.uuid + + def get_normalized_uuid_string(self): + if self.uuid: + return str(self.uuid).upper() + return None + + def create_target(self): + '''Create a target using the information in this Image object.''' + if self.unavailable: + return None + + if self.locate_module_and_debug_symbols(): + resolved_path = self.get_resolved_path() + path_spec = lldb.SBFileSpec(resolved_path) + error = lldb.SBError() + target = lldb.debugger.CreateTarget( + resolved_path, self.arch, None, False, error) + if target: + self.module = target.FindModule(path_spec) + if self.has_section_load_info(): + err = self.load_module(target) + if err: + print('ERROR: ', err) + return target + else: + print('error: unable to create a valid target for (%s) "%s"' % (self.arch, self.path)) + else: + print('error: unable to locate main executable (%s) "%s"' % (self.arch, self.path)) + return None + + +class Symbolicator: + + def __init__(self): + """A class the represents the information needed to symbolicate addresses in a program""" + self.target = None + self.images = list() # a list of images to be used when symbolicating + self.addr_mask = 0xffffffffffffffff + + @classmethod + def InitWithSBTarget(cls, target): + obj = cls() + obj.target = target + obj.images = list() + triple = target.triple + if triple: + arch = triple.split('-')[0] + if "arm" in arch: + obj.addr_mask = 0xfffffffffffffffe + + for module in target.modules: + image = Image.InitWithSBTargetAndSBModule(target, module) + obj.images.append(image) + return obj + + def __str__(self): + s = "Symbolicator:\n" + if self.target: + s += "Target = '%s'\n" % (self.target) + s += "Target modules:\n" + for m in self.target.modules: + s += str(m) + "\n" + s += "Images:\n" + for image in self.images: + s += ' %s\n' % (image) + return s + + def find_images_with_identifier(self, identifier): + images = list() + for image in self.images: + if image.identifier == identifier: + images.append(image) + if len(images) == 0: + regex_text = '^.*\.%s$' % (re.escape(identifier)) + regex = re.compile(regex_text) + for image in self.images: + if regex.match(image.identifier): + images.append(image) + return images + + def find_image_containing_load_addr(self, load_addr): + for image in self.images: + if image.get_section_containing_load_addr(load_addr): + return image + return None + + def create_target(self): + if self.target: + return self.target + + if self.images: + for image in self.images: + self.target = image.create_target() + if self.target: + if self.target.GetAddressByteSize() == 4: + triple = self.target.triple + if triple: + arch = triple.split('-')[0] + if "arm" in arch: + self.addr_mask = 0xfffffffffffffffe + return self.target + return None + + def symbolicate(self, load_addr, verbose=False): + if not self.target: + self.create_target() + if self.target: + live_process = False + process = self.target.process + if process: + state = process.state + if state > lldb.eStateUnloaded and state < lldb.eStateDetached: + live_process = True + # If we don't have a live process, we can attempt to find the image + # that a load address belongs to and lazily load its module in the + # target, but we shouldn't do any of this if we have a live process + if not live_process: + image = self.find_image_containing_load_addr(load_addr) + if image: + image.add_module(self.target) + symbolicated_address = Address(self.target, load_addr) + if symbolicated_address.symbolicate(verbose): + if symbolicated_address.so_addr: + symbolicated_addresses = list() + symbolicated_addresses.append(symbolicated_address) + # See if we were able to reconstruct anything? + while True: + inlined_parent_so_addr = lldb.SBAddress() + inlined_parent_sym_ctx = symbolicated_address.sym_ctx.GetParentOfInlinedScope( + symbolicated_address.so_addr, inlined_parent_so_addr) + if not inlined_parent_sym_ctx: + break + if not inlined_parent_so_addr: + break + + symbolicated_address = Address( + self.target, inlined_parent_so_addr.GetLoadAddress( + self.target)) + symbolicated_address.sym_ctx = inlined_parent_sym_ctx + symbolicated_address.so_addr = inlined_parent_so_addr + symbolicated_address.symbolicate(verbose) + + # push the new frame onto the new frame stack + symbolicated_addresses.append(symbolicated_address) + + if symbolicated_addresses: + return symbolicated_addresses + else: + print('error: no target in Symbolicator') + return None + + +def disassemble_instructions( + target, + instructions, + pc, + insts_before_pc, + insts_after_pc, + non_zeroeth_frame): + lines = list() + pc_index = -1 + comment_column = 50 + for inst_idx, inst in enumerate(instructions): + inst_pc = inst.GetAddress().GetLoadAddress(target) + if pc == inst_pc: + pc_index = inst_idx + mnemonic = inst.GetMnemonic(target) + operands = inst.GetOperands(target) + comment = inst.GetComment(target) + lines.append("%#16.16x: %8s %s" % (inst_pc, mnemonic, operands)) + if comment: + line_len = len(lines[-1]) + if line_len < comment_column: + lines[-1] += ' ' * (comment_column - line_len) + lines[-1] += "; %s" % comment + + if pc_index >= 0: + # If we are disassembling the non-zeroeth frame, we need to backup the + # PC by 1 + if non_zeroeth_frame and pc_index > 0: + pc_index = pc_index - 1 + if insts_before_pc == -1: + start_idx = 0 + else: + start_idx = pc_index - insts_before_pc + if start_idx < 0: + start_idx = 0 + if insts_before_pc == -1: + end_idx = inst_idx + else: + end_idx = pc_index + insts_after_pc + if end_idx > inst_idx: + end_idx = inst_idx + for i in range(start_idx, end_idx + 1): + if i == pc_index: + print(' -> ', lines[i]) + else: + print(' ', lines[i]) + + +def print_module_section_data(section): + print(section) + section_data = section.GetSectionData() + if section_data: + ostream = lldb.SBStream() + section_data.GetDescription(ostream, section.GetFileAddress()) + print(ostream.GetData()) + + +def print_module_section(section, depth): + print(section) + if depth > 0: + num_sub_sections = section.GetNumSubSections() + for sect_idx in range(num_sub_sections): + print_module_section( + section.GetSubSectionAtIndex(sect_idx), depth - 1) + + +def print_module_sections(module, depth): + for sect in module.section_iter(): + print_module_section(sect, depth) + + +def print_module_symbols(module): + for sym in module: + print(sym) + + +def Symbolicate(command_args): + + usage = "usage: %prog [options] <addr1> [addr2 ...]" + description = '''Symbolicate one or more addresses using LLDB's python scripting API..''' + parser = optparse.OptionParser( + description=description, + prog='crashlog.py', + usage=usage) + parser.add_option( + '-v', + '--verbose', + action='store_true', + dest='verbose', + help='display verbose debug info', + default=False) + parser.add_option( + '-p', + '--platform', + type='string', + metavar='platform', + dest='platform', + help='Specify the platform to use when creating the debug target. Valid values include "localhost", "darwin-kernel", "ios-simulator", "remote-freebsd", "remote-macosx", "remote-ios", "remote-linux".') + parser.add_option( + '-f', + '--file', + type='string', + metavar='file', + dest='file', + help='Specify a file to use when symbolicating') + parser.add_option( + '-a', + '--arch', + type='string', + metavar='arch', + dest='arch', + help='Specify a architecture to use when symbolicating') + parser.add_option( + '-s', + '--slide', + type='int', + metavar='slide', + dest='slide', + help='Specify the slide to use on the file specified with the --file option', + default=None) + parser.add_option( + '--section', + type='string', + action='append', + dest='section_strings', + help='specify <sect-name>=<start-addr> or <sect-name>=<start-addr>-<end-addr>') + try: + (options, args) = parser.parse_args(command_args) + except: + return + symbolicator = Symbolicator() + images = list() + if options.file: + image = Image(options.file) + image.arch = options.arch + # Add any sections that were specified with one or more --section + # options + if options.section_strings: + for section_str in options.section_strings: + section = Section() + if section.set_from_string(section_str): + image.add_section(section) + else: + sys.exit(1) + if options.slide is not None: + image.slide = options.slide + symbolicator.images.append(image) + + target = symbolicator.create_target() + if options.verbose: + print(symbolicator) + if target: + for addr_str in args: + addr = int(addr_str, 0) + symbolicated_addrs = symbolicator.symbolicate( + addr, options.verbose) + for symbolicated_addr in symbolicated_addrs: + print(symbolicated_addr) + print() + else: + print('error: no target for %s' % (symbolicator)) + +if __name__ == '__main__': + # Create a new debugger instance + lldb.debugger = lldb.SBDebugger.Create() + Symbolicate(sys.argv[1:]) |