Coverage for dwarfutils/objdump.py: 92%

Hot-keys on this page

r m x p toggle line displays

j k next/prev highlighted chunk

0 (zero) top of page

1 (one) first highlighted chunk

# This program is free software; you can redistribute it and/or modify

# it under the terms of the GNU General Public License as published by

# the Free Software Foundation; either version 3 of the License, or

# (at your option) any later version.

# This program is distributed in the hope that it will be useful,

# but WITHOUT ANY WARRANTY; without even the implied warranty of

# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the

# GNU General Public License for more details.

# You should have received a copy of the GNU General Public License

# along with GCC; see the file COPYING3. If not see

# <http://www.gnu.org/licenses/>.

# objdump-based DWARF parser

# TODO: for now, this assumes that there is only one compilation unit per

# object file. This should be implemented later if needed.

import re

import subprocess

import dwarfutils

from dwarfutils.data import Abbrev, CompilationUnit, Defer, DIE, Exprloc

from dwarfutils.helpers import as_ascii

abbrev_tag_re = re.compile(r'\s+(?P<number>\d+)'

r'\s+(?P<tag>DW_TAG_[a-zA-Z0-9_]+)'

r'\s+\[(?P<has_children>.*)\]')

attr_re = re.compile(r'\s+(?P<attr>DW_AT(_[a-zA-Z0-9_]+| value: \d+))'

r'\s+(?P<form>DW_FORM(_[a-zA-Z0-9_]+| value: \d+))')

compilation_unit_re = re.compile(r'\s+Compilation Unit @ offset'

r' (?P<offset>0x[0-9a-f]+):')

compilation_unit_attr_re = re.compile(r'\s+(?P<name>[A-Z][a-zA-Z ]*):'

r'\s+(?P<value>.*)')

die_re = re.compile(r'\s+<(?P<level>\d+)>'

r'<(?P<offset>[0-9a-f]+)>:'

r' Abbrev Number: (?P<abbrev_number>\d+)'

r'( \((?P<tag>DW_TAG_[a-zA-Z0-9_]+)\))?')

die_attr_re = re.compile(r'\s+<(?P<offset>[0-9a-f]+)>'

r'\s+(?P<attr>DW_AT_[a-zA-Z0-9_]+)'

r'\s*: (?P<value>.*)')

indirect_string_re = re.compile(r'\(indirect string, offset: 0x[0-9a-f]+\):'

r' (?P<value>.*)')

language_re = re.compile(r'(?P<number>\d+)\s+\((?P<name>.*)\)')

block_re = re.compile(r'\d+ byte block: (?P<value>[0-9a-f ]+)')

loc_expr_re = re.compile(r'\d+ byte block:'

r' (?P<bytes>[0-9a-f ]+)'

r'\s+\((?P<expr>.*)\)')

def command_output(argv): # no-coverage

"""

Dummy re-implementation of `subprocess.check_output`. This function was

added in Python 2.7 and we need to support Python 2.6 interpreters.

:param list[str] argv: Command line to run.

:rtype: str

"""

command = argv[0]

p = subprocess.Popen(

argv, stdin=subprocess.PIPE, stdout=subprocess.PIPE,

stderr=subprocess.PIPE

)

stdout, stderr = p.communicate()

if stderr:

print('Running {0} gave a non-empty stderr:'.format(command))

print(stderr)

raise RuntimeError('non-empty stderr')

if p.returncode != 0:

print('Running {0} gave a non-zero return code ({1})'.format(

command, p.returncode

))

raise RuntimeError('non-zero return code')

return stdout

class Objdump(object): # no-coverage

"""

Runner for objdump to get dumps.

"""

def __init__(self, object_file):

self.object_file = object_file

def _run(self, part):

return [

as_ascii(line).rstrip()

for line in command_output([dwarfutils.DWARF_DUMP_TOOL,

'--dwarf=' + part,

self.object_file]).splitlines()

if line.strip()

]

def get_info(self):

"""Run objdump --dwarf=info."""

return self._run('info')

def get_abbrev(self):

return self._run('abbrev')

def parse_dwarf(object_file): # no-coverage

"""

Implementation of dwarfutils.parse_dwarf for objdump.

Run objdump on `object_file` and parse the list compilation units it

contains.

:param str object_file: Name of the object file to process.

:rtype: list[CompilationUnit]

"""

return _parse_dwarf(Objdump(object_file))

def parse_abbrevs(object_file): # no-coverage

"""

Run objdump on `object_file` and parse the list of abbreviations it

contains.

:param str object_file: Name of the object file to process.

:rtype: list[Abbrev]

"""

return _parse_abbrevs(Objdump(object_file))

def _parse_dwarf(objdump):

"""

Implementation of dwarfutils.parse_dwarf for objdump.

Run objdump on `object_file` and parse the list compilation units it

contains.

:param str object_file: Name of the object file to process.

:rtype: list[CompilationUnit]

"""

abbrevs = _parse_abbrevs(objdump)

lines = objdump.get_info()

i = [0]

def next_line():

if i[0] >= len(lines):

return None

i[0] += 1

return lines[i[0] - 1]

result = []

die_stack = []

last_die = None

while True:

line = next_line()

if line is None:

break

# Try to match the beginning of a compilation unit

m = compilation_unit_re.match(line)

if m:

offset = int(m.group('offset'), 16)

attrs = {}

while True:

m = compilation_unit_attr_re.match(next_line())

if not m:

i[0] -= 1

break

attrs[m.group('name')] = m.group('value')

length, is_32bit = attrs['Length'].split()

length = int(length, 16)

is_32bit = is_32bit == '(32-bit)'

version = int(attrs['Version'])

abbrev_offset = int(attrs['Abbrev Offset'], 16)

pointer_size = int(attrs['Pointer Size'])

assert abbrev_offset == 0, ('Multiple compilations unit are not'

' handled for now')

abbrevs_sublist = list(abbrevs)

result.append(CompilationUnit(offset, length, is_32bit, version,

abbrevs_sublist, pointer_size))

continue

# Try to match the beginning of a DIE

m = die_re.match(line)

if m:

assert result, 'Invalid DIE: missing containing compilation unit'

cu = result[-1]

level = int(m.group('level'))

offset = int(m.group('offset'), 16)

abbrev_number = int(m.group('abbrev_number'))

tag = m.group('tag')

assert level == len(die_stack)

# The end of child list is represented as a special DIE with

# abbreviation number 0.

if tag is None:

assert abbrev_number == 0

die_stack.pop()

continue

die = DIE(cu, level, offset, abbrev_number)

last_die = die

assert die.tag == tag, 'Unexpected tag for {0}: got {1}'.format(

die, tag

)

if die_stack:

die_stack[-1].add_child(die)

else:

cu.set_root(die)

219 ↛ 221line 219 didn't jump to line 221, because the condition on line 219 was never false if die.has_children:

die_stack.append(die)

continue

# Try to match an attribute

m = die_attr_re.match(line)

if m:

assert die_stack, 'Invalid attribute: missing containing DIE'

die = last_die

offset = int(m.group('offset'), 16)

name = m.group('attr')

value = m.group('value')

form = die.next_attribute_form(name)

try:

value_decoder = value_decoders[form]

except KeyError:

pass

else:

try:

value = value_decoder(die, name, form, offset, value)

except ValueError:

print('Error while decoding {0} ({1}) at {2:#x}:'

' {3}'.format(name, form, offset, value))

raise

die.add_attribute(name, form, offset, value)

continue

# Otherwise, we must be processing "header" text before the dump

# itself: just discard it.

assert not result, 'Unhandled output: ' + line

return result

def _parse_abbrevs(objdump):

"""

Run objdump on `object_file` and parse the list of abbreviations it

contains.

:param str object_file: Name of the object file to process.

:rtype: list[Abbrev]

"""

result = []

for line in objdump.get_abbrev():

# Try to match a new abbrevation

m = abbrev_tag_re.match(line)

if m:

number = int(m.group('number'))

tag = m.group('tag')

has_children = m.group('has_children')

assert has_children in ('has children', 'no children')

has_children = has_children == 'has children'

result.append(Abbrev(number, tag, has_children))

continue

# Try to match an attribute

m = attr_re.match(line)

if m:

assert result, 'Invalid attribute: missing containing abbreviation'

name = m.group('attr')

form = m.group('form')

# When objdump finds unknown abbreviation numbers or unknown form

# numbers, it cannot turn them into names.

if name.startswith('DW_AT value'):

name = int(name.split()[-1])

if form.startswith('DW_FORM value'):

form = int(form.split()[-1])

# The (0, 0) couple marks the end of the attribute list

293 ↛ 295line 293 didn't jump to line 295, because the condition on line 293 was never false if name != 0 or form != 0:

result[-1].add_attribute(name, form)

continue

# Otherwise, we must be processing "header" text before the dump

# itself: just discard it.

assert not result, 'Unhandled output: ' + line

return result

# Decoders for attribute values

def _decode_flag_present(die, name, form, offset, value):

return True

def _decode_flag(die, name, form, offset, value):

return bool(int(value))

def _decode_data(die, name, form, offset, value):

if name == 'DW_AT_language':

m = language_re.match(value)

assert m, 'Unhandled language value: {0}'.format(value)

return m.group('name')

elif name == 'DW_AT_encoding':

m = language_re.match(value)

assert m, 'Unhandled encoding value: {0}'.format(value)

return m.group('name')

return int(value, 16) if value.startswith('0x') else int(value)

def _decode_ref(die, name, form, offset, value):

assert value[0] == '<' and value[-1] == '>'

offset = int(value[1:-1], 16)

return Defer(lambda: die.cu.get(offset))

def _decode_indirect_string(die, name, form, offset, value):

m = indirect_string_re.match(value)

assert m, 'Unhandled indirect string: ' + value

return m.group('value')

def _decode_block(die, name, form, offset, value, no_exprloc=False):

if (

not no_exprloc and

name in ('DW_AT_location', 'DW_AT_data_member_location')

return _decode_exprloc(die, name, form, offset, value, )

m = block_re.match(value)

assert m, 'Unhandled block value: {0}'.format(value)

return [int(b, 16) for b in m.group('value').split()]

def _decode_exprloc(die, name, form, offset, value):

m = loc_expr_re.match(value)

354 ↛ 359line 354 didn't jump to line 359, because the condition on line 354 was never true if not m:

# Even though they have the expected DW_FORM_exploc form, objdump does

# not decode some location expressions such as DW_AT_byte_size. In this

# case, return a dummy block decoding instead.

# TODO: implement raw bytes parsing into expressions instead.

return _decode_block(die, name, form, offset, value, no_exprloc=True)

byte_list = [int(b, 16) for b in m.group('bytes').split()]

expr = m.group('expr')

operations = []

for op in expr.split('; '):

chunks = op.split(': ', 1)

assert len(chunks) <= 2, (

'Unhandled DWARF expression operation: {0}'.format(op)

)

opcode = chunks[0]

operands = chunks[1].split() if len(chunks) == 2 else []

operations.append((opcode, ) + tuple(operands))

return Exprloc(byte_list, operations)

value_decoders = {

'DW_FORM_flag_present': _decode_flag_present,

'DW_FORM_flag': _decode_flag,

'DW_FORM_addr': _decode_data,

'DW_FORM_sec_offset': _decode_data,

'DW_FORM_data1': _decode_data,

'DW_FORM_data2': _decode_data,

'DW_FORM_data4': _decode_data,

'DW_FORM_data8': _decode_data,

'DW_FORM_sdata': _decode_data,

'DW_FORM_udata': _decode_data,

'DW_FORM_ref4': _decode_ref,

'DW_FORM_ref8': _decode_ref,

'DW_FORM_strp': _decode_indirect_string,

'DW_FORM_block': _decode_block,

'DW_FORM_block1': _decode_block,

'DW_FORM_block2': _decode_block,

'DW_FORM_block4': _decode_block,

'DW_FORM_block8': _decode_block,

'DW_FORM_exprloc': _decode_exprloc,

# TODO: handle all existing forms

}

Coverage for dwarfutils/objdump.py : 92%

163 statements 151 run 12 missing 30 excluded 3 partial