Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1

2

3

4

5

6

7

8

9

10

11

12

13

14

15

16

17

18

19

20

21

22

23

24

25

26

27

28

29

30

31

32

33

34

35

36

37

38

39

40

41

42

43

44

45

46

47

48

49

50

51

52

53

54

55

56

57

58

59

60

61

62

63

64

65

66

67

68

69

70

71

72

73

74

75

76

77

78

79

80

81

82

83

84

85

86

87

88

89

90

91

92

93

94

95

96

97

98

99

100

101

102

103

104

105

106

107

108

109

110

111

112

113

114

115

116

117

118

119

120

121

122

123

124

125

126

127

128

129

130

131

132

133

134

135

136

137

138

139

140

141

142

143

144

145

146

147

148

149

150

151

152

153

154

155

156

157

158

159

160

161

162

163

164

165

166

167

168

169

170

171

172

173

174

175

176

177

178

179

180

181

182

183

184

185

186

187

188

189

190

191

192

193

194

195

196

197

198

199

200

201

202

203

204

205

206

207

208

209

210

211

212

213

214

215

216

217

218

219

220

221

222

223

224

225

226

227

228

229

230

231

232

233

234

235

236

237

238

239

240

241

242

243

244

245

246

247

248

249

250

251

252

253

254

255

256

257

258

259

260

261

262

263

264

265

266

267

268

269

270

271

272

273

274

275

276

277

278

279

280

281

282

283

284

285

286

287

288

289

290

291

292

293

294

295

296

297

298

299

300

301

302

303

304

305

306

307

308

309

310

311

312

313

314

315

316

317

318

319

320

321

322

323

324

325

326

327

328

329

330

331

332

333

334

335

336

337

338

339

340

341

342

343

344

345

346

347

348

349

350

351

352

353

354

355

356

357

358

359

360

361

362

363

364

365

366

367

368

369

370

371

372

373

374

375

376

377

378

379

380

381

382

383

384

385

386

387

388

389

390

391

392

393

394

395

396

397

398

399

400

401

402

403

404

# Copyright (C) 2017 Free Software Foundation, Inc. 

 

# This program is free software; you can redistribute it and/or modify 

# it under the terms of the GNU General Public License as published by 

# the Free Software Foundation; either version 3 of the License, or 

# (at your option) any later version. 

# 

# This program is distributed in the hope that it will be useful, 

# but WITHOUT ANY WARRANTY; without even the implied warranty of 

# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

# GNU General Public License for more details. 

# 

# You should have received a copy of the GNU General Public License 

# along with GCC; see the file COPYING3. If not see 

# <http://www.gnu.org/licenses/>. 

 

# objdump-based DWARF parser 

 

# TODO: for now, this assumes that there is only one compilation unit per 

# object file. This should be implemented later if needed. 

 

import re 

import subprocess 

 

import dwarfutils 

from dwarfutils.data import Abbrev, CompilationUnit, Defer, DIE, Exprloc 

from dwarfutils.helpers import as_ascii 

 

 

abbrev_tag_re = re.compile(r'\s+(?P<number>\d+)' 

r'\s+(?P<tag>DW_TAG_[a-zA-Z0-9_]+)' 

r'\s+\[(?P<has_children>.*)\]') 

attr_re = re.compile(r'\s+(?P<attr>DW_AT(_[a-zA-Z0-9_]+| value: \d+))' 

r'\s+(?P<form>DW_FORM(_[a-zA-Z0-9_]+| value: \d+))') 

 

compilation_unit_re = re.compile(r'\s+Compilation Unit @ offset' 

r' (?P<offset>0x[0-9a-f]+):') 

compilation_unit_attr_re = re.compile(r'\s+(?P<name>[A-Z][a-zA-Z ]*):' 

r'\s+(?P<value>.*)') 

die_re = re.compile(r'\s+<(?P<level>\d+)>' 

r'<(?P<offset>[0-9a-f]+)>:' 

r' Abbrev Number: (?P<abbrev_number>\d+)' 

r'( \((?P<tag>DW_TAG_[a-zA-Z0-9_]+)\))?') 

die_attr_re = re.compile(r'\s+<(?P<offset>[0-9a-f]+)>' 

r'\s+(?P<attr>DW_AT_[a-zA-Z0-9_]+)' 

r'\s*: (?P<value>.*)') 

 

indirect_string_re = re.compile(r'\(indirect string, offset: 0x[0-9a-f]+\):' 

r' (?P<value>.*)') 

language_re = re.compile(r'(?P<number>\d+)\s+\((?P<name>.*)\)') 

block_re = re.compile(r'\d+ byte block: (?P<value>[0-9a-f ]+)') 

loc_expr_re = re.compile(r'\d+ byte block:' 

r' (?P<bytes>[0-9a-f ]+)' 

r'\s+\((?P<expr>.*)\)') 

 

 

def command_output(argv): # no-coverage 

""" 

Dummy re-implementation of `subprocess.check_output`. This function was 

added in Python 2.7 and we need to support Python 2.6 interpreters. 

 

:param list[str] argv: Command line to run. 

:rtype: str 

""" 

command = argv[0] 

p = subprocess.Popen( 

argv, stdin=subprocess.PIPE, stdout=subprocess.PIPE, 

stderr=subprocess.PIPE 

) 

stdout, stderr = p.communicate() 

if stderr: 

print('Running {0} gave a non-empty stderr:'.format(command)) 

print(stderr) 

raise RuntimeError('non-empty stderr') 

if p.returncode != 0: 

print('Running {0} gave a non-zero return code ({1})'.format( 

command, p.returncode 

)) 

raise RuntimeError('non-zero return code') 

return stdout 

 

 

class Objdump(object): # no-coverage 

""" 

Runner for objdump to get dumps. 

""" 

 

def __init__(self, object_file): 

self.object_file = object_file 

 

def _run(self, part): 

return [ 

as_ascii(line).rstrip() 

for line in command_output([dwarfutils.DWARF_DUMP_TOOL, 

'--dwarf=' + part, 

self.object_file]).splitlines() 

if line.strip() 

] 

 

def get_info(self): 

"""Run objdump --dwarf=info.""" 

return self._run('info') 

 

def get_abbrev(self): 

return self._run('abbrev') 

 

 

def parse_dwarf(object_file): # no-coverage 

""" 

Implementation of dwarfutils.parse_dwarf for objdump. 

 

Run objdump on `object_file` and parse the list compilation units it 

contains. 

 

:param str object_file: Name of the object file to process. 

:rtype: list[CompilationUnit] 

""" 

return _parse_dwarf(Objdump(object_file)) 

 

 

def parse_abbrevs(object_file): # no-coverage 

""" 

Run objdump on `object_file` and parse the list of abbreviations it 

contains. 

 

:param str object_file: Name of the object file to process. 

:rtype: list[Abbrev] 

""" 

return _parse_abbrevs(Objdump(object_file)) 

 

 

def _parse_dwarf(objdump): 

""" 

Implementation of dwarfutils.parse_dwarf for objdump. 

 

Run objdump on `object_file` and parse the list compilation units it 

contains. 

 

:param str object_file: Name of the object file to process. 

:rtype: list[CompilationUnit] 

""" 

abbrevs = _parse_abbrevs(objdump) 

 

lines = objdump.get_info() 

i = [0] 

def next_line(): 

if i[0] >= len(lines): 

return None 

i[0] += 1 

return lines[i[0] - 1] 

 

result = [] 

die_stack = [] 

last_die = None 

 

while True: 

line = next_line() 

if line is None: 

break 

 

# Try to match the beginning of a compilation unit 

m = compilation_unit_re.match(line) 

if m: 

offset = int(m.group('offset'), 16) 

 

attrs = {} 

while True: 

m = compilation_unit_attr_re.match(next_line()) 

if not m: 

i[0] -= 1 

break 

attrs[m.group('name')] = m.group('value') 

 

length, is_32bit = attrs['Length'].split() 

length = int(length, 16) 

is_32bit = is_32bit == '(32-bit)' 

 

version = int(attrs['Version']) 

abbrev_offset = int(attrs['Abbrev Offset'], 16) 

pointer_size = int(attrs['Pointer Size']) 

 

assert abbrev_offset == 0, ('Multiple compilations unit are not' 

' handled for now') 

abbrevs_sublist = list(abbrevs) 

 

result.append(CompilationUnit(offset, length, is_32bit, version, 

abbrevs_sublist, pointer_size)) 

continue 

 

# Try to match the beginning of a DIE 

m = die_re.match(line) 

if m: 

assert result, 'Invalid DIE: missing containing compilation unit' 

cu = result[-1] 

 

level = int(m.group('level')) 

offset = int(m.group('offset'), 16) 

abbrev_number = int(m.group('abbrev_number')) 

tag = m.group('tag') 

 

assert level == len(die_stack) 

 

# The end of child list is represented as a special DIE with 

# abbreviation number 0. 

if tag is None: 

assert abbrev_number == 0 

die_stack.pop() 

continue 

 

die = DIE(cu, level, offset, abbrev_number) 

last_die = die 

assert die.tag == tag, 'Unexpected tag for {0}: got {1}'.format( 

die, tag 

) 

if die_stack: 

die_stack[-1].add_child(die) 

else: 

cu.set_root(die) 

219 ↛ 221line 219 didn't jump to line 221, because the condition on line 219 was never false if die.has_children: 

die_stack.append(die) 

continue 

 

# Try to match an attribute 

m = die_attr_re.match(line) 

if m: 

assert die_stack, 'Invalid attribute: missing containing DIE' 

die = last_die 

 

offset = int(m.group('offset'), 16) 

name = m.group('attr') 

value = m.group('value') 

 

form = die.next_attribute_form(name) 

try: 

value_decoder = value_decoders[form] 

except KeyError: 

pass 

else: 

try: 

value = value_decoder(die, name, form, offset, value) 

except ValueError: 

print('Error while decoding {0} ({1}) at {2:#x}:' 

' {3}'.format(name, form, offset, value)) 

raise 

die.add_attribute(name, form, offset, value) 

continue 

 

# Otherwise, we must be processing "header" text before the dump 

# itself: just discard it. 

assert not result, 'Unhandled output: ' + line 

 

return result 

 

 

def _parse_abbrevs(objdump): 

""" 

Run objdump on `object_file` and parse the list of abbreviations it 

contains. 

 

:param str object_file: Name of the object file to process. 

:rtype: list[Abbrev] 

""" 

result = [] 

 

for line in objdump.get_abbrev(): 

# Try to match a new abbrevation 

m = abbrev_tag_re.match(line) 

if m: 

number = int(m.group('number')) 

tag = m.group('tag') 

has_children = m.group('has_children') 

assert has_children in ('has children', 'no children') 

has_children = has_children == 'has children' 

 

result.append(Abbrev(number, tag, has_children)) 

continue 

 

# Try to match an attribute 

m = attr_re.match(line) 

if m: 

assert result, 'Invalid attribute: missing containing abbreviation' 

name = m.group('attr') 

form = m.group('form') 

 

# When objdump finds unknown abbreviation numbers or unknown form 

# numbers, it cannot turn them into names. 

if name.startswith('DW_AT value'): 

name = int(name.split()[-1]) 

if form.startswith('DW_FORM value'): 

form = int(form.split()[-1]) 

 

# The (0, 0) couple marks the end of the attribute list 

293 ↛ 295line 293 didn't jump to line 295, because the condition on line 293 was never false if name != 0 or form != 0: 

result[-1].add_attribute(name, form) 

continue 

 

# Otherwise, we must be processing "header" text before the dump 

# itself: just discard it. 

assert not result, 'Unhandled output: ' + line 

 

return result 

 

 

# Decoders for attribute values 

 

def _decode_flag_present(die, name, form, offset, value): 

return True 

 

 

def _decode_flag(die, name, form, offset, value): 

return bool(int(value)) 

 

 

def _decode_data(die, name, form, offset, value): 

if name == 'DW_AT_language': 

m = language_re.match(value) 

assert m, 'Unhandled language value: {0}'.format(value) 

return m.group('name') 

 

elif name == 'DW_AT_encoding': 

m = language_re.match(value) 

assert m, 'Unhandled encoding value: {0}'.format(value) 

return m.group('name') 

 

return int(value, 16) if value.startswith('0x') else int(value) 

 

 

def _decode_ref(die, name, form, offset, value): 

assert value[0] == '<' and value[-1] == '>' 

offset = int(value[1:-1], 16) 

return Defer(lambda: die.cu.get(offset)) 

 

 

def _decode_indirect_string(die, name, form, offset, value): 

m = indirect_string_re.match(value) 

assert m, 'Unhandled indirect string: ' + value 

return m.group('value') 

 

 

def _decode_block(die, name, form, offset, value, no_exprloc=False): 

if ( 

not no_exprloc and 

name in ('DW_AT_location', 'DW_AT_data_member_location') 

): 

return _decode_exprloc(die, name, form, offset, value, ) 

 

m = block_re.match(value) 

assert m, 'Unhandled block value: {0}'.format(value) 

return [int(b, 16) for b in m.group('value').split()] 

 

 

def _decode_exprloc(die, name, form, offset, value): 

m = loc_expr_re.match(value) 

354 ↛ 359line 354 didn't jump to line 359, because the condition on line 354 was never true if not m: 

# Even though they have the expected DW_FORM_exploc form, objdump does 

# not decode some location expressions such as DW_AT_byte_size. In this 

# case, return a dummy block decoding instead. 

# TODO: implement raw bytes parsing into expressions instead. 

return _decode_block(die, name, form, offset, value, no_exprloc=True) 

 

byte_list = [int(b, 16) for b in m.group('bytes').split()] 

 

expr = m.group('expr') 

operations = [] 

for op in expr.split('; '): 

chunks = op.split(': ', 1) 

assert len(chunks) <= 2, ( 

'Unhandled DWARF expression operation: {0}'.format(op) 

) 

opcode = chunks[0] 

operands = chunks[1].split() if len(chunks) == 2 else [] 

operations.append((opcode, ) + tuple(operands)) 

 

return Exprloc(byte_list, operations) 

 

 

value_decoders = { 

'DW_FORM_flag_present': _decode_flag_present, 

'DW_FORM_flag': _decode_flag, 

 

'DW_FORM_addr': _decode_data, 

'DW_FORM_sec_offset': _decode_data, 

'DW_FORM_data1': _decode_data, 

'DW_FORM_data2': _decode_data, 

'DW_FORM_data4': _decode_data, 

'DW_FORM_data8': _decode_data, 

'DW_FORM_sdata': _decode_data, 

'DW_FORM_udata': _decode_data, 

 

'DW_FORM_ref4': _decode_ref, 

'DW_FORM_ref8': _decode_ref, 

 

'DW_FORM_strp': _decode_indirect_string, 

 

'DW_FORM_block': _decode_block, 

'DW_FORM_block1': _decode_block, 

'DW_FORM_block2': _decode_block, 

'DW_FORM_block4': _decode_block, 

'DW_FORM_block8': _decode_block, 

'DW_FORM_block8': _decode_block, 

'DW_FORM_exprloc': _decode_exprloc, 

 

# TODO: handle all existing forms 

}