I'm not very familiar with Python, and I am just discovering GDB python scripting capabilities; the motivation of my question is to enhance the GDB printing of values inside the MELT monitor which will later be connected to GCC MELT. But here is a simpler variant.
My system is Linux/Debian/Sid/x86-64. the GCC compiler is 4.8.2; the GDB debugger is 7.6.2; its python is 3.3
I want to debug a C program with a "discriminated union" type:
// file tiny.c in the public domain by Basile Starynkevitch
// compile with gcc -g3 -Wall -std=c99 tiny.c -o tiny
// debug with gdb tiny
// under gdb: python tiny-gdb.py
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
typedef union my_un myval_t;
enum tag_en {
tag_none,
tag_int,
tag_string,
tag_sequence
};
struct boxint_st;
struct boxstring_st;
struct boxsequence_st;
union my_un {
void* ptr;
enum tag_en *ptag;
struct boxint_st *pint;
struct boxstring_st *pstr;
struct boxsequence_st *pseq;
};
struct boxint_st {
enum tag_en tag; // for tag_int
int ival;
};
struct boxstring_st {
enum tag_en tag; // for tag_string
char strval[]; // a zero-terminated C string
};
struct boxsequence_st {
enum tag_en tag; // for tag_sequence
unsigned slen;
myval_t valtab[]; // of length slen
};
int main (int argc, char **argv) {
printf ("start %s, argc=%d", argv[0], argc);
struct boxint_st *iv42 = malloc (sizeof (struct boxint_st));
iv42->tag = tag_int;
iv42->ival = 42;
struct boxstring_st *istrhello =
malloc (sizeof (struct boxstring_st) + sizeof ("hello") + 1);
istrhello->tag = tag_string;
strcpy (istrhello->strval, "hello");
struct boxsequence_st *iseq3 =
malloc (sizeof (struct boxsequence_st) + 3 * sizeof (myval_t));
iseq3->tag = tag_sequence;
iseq3->slen = 3;
iseq3->valtab[0] = (myval_t)iv42;
iseq3->valtab[1] = (myval_t)istrhello;
iseq3->valtab[2] = (myval_t)NULL;
printf ("before %s:%d gdb print iseq3\n", __FILE__, __LINE__);
}
Here is my Python file to be read under gdb
# file tiny-gdb.py in the public domain by Basile Starynkevitch
## see also tiny.c file
class my_val_Printer:
"""pretty prints a my_val"""
def __init__ (self, val):
self.val = val
def to_string (self):
outs = "my_val@" + self.val['ptr']
mytag = self.val['ptag'].dereference();
if (mytag):
outs = outs + mytag.to_string()
def display_hint (self):
return 'my_val'
def my_val_lookup(val):
lookup = val.type.tag
if (lookup == None):
return None
if lookup == "my_val":
return my_val_Printer(val)
return None
I'm stuck with the following basic questions.
union my_un
and its typedef-ed synonym myval_t
the same way.struct boxsequence_st
? This means detecting that the pointer is non-nil, then dereferencing its ptag
, comparing that tag to tag_sequence
, pretty printing the valtab
flexible array member.I don't have enough experience with the gdb Python api to call this an answer; I consider this just some research notes from a fellow developer. My code attached below is quite crude and ugly, too. However, this does work with gdb-7.4 and python-2.7.3. An example debugging run:
$ gcc -Wall -g3 tiny.c -o tiny
$ gdb tiny
(gdb) b 58
(gdb) run
(gdb) print iseq3
$1 = (struct boxsequence_st *) 0x602050
(gdb) print iv42
$2 = (struct boxint_st *) 0x602010
(gdb) print istrhello
$3 = (struct boxstring_st *) 0x602030
All of the above are bog-standard pretty-printed outputs -- my reasoning is that I often want to see what the pointers are, so I didn't want to override those. However, dreferencing the pointers uses the prettyprinter shown further below:
(gdb) print *iseq3
$4 = (struct boxsequence_st)(3) = {(struct boxint_st)42, (struct boxstring_st)"hello"(5), NULL}
(gdb) print *iv42
$5 = (struct boxint_st)42
(gdb) print *istrhello
$6 = (struct boxstring_st)"hello"(5)
(gdb) set print array
(gdb) print *iseq3
$7 = (struct boxsequence_st)(3) = {
(struct boxint_st)42,
(struct boxstring_st)"hello"(5),
NULL
}
(gdb) info auto-load
Loaded Script
Yes /home/.../tiny-gdb.py
The last line shows that when debugging tiny
, tiny-gdb.py
in the same directory gets loaded automatically (although you can disable this, I do believe this is the default behaviour).
The tiny-gdb.py
file used for above:
def deref(reference):
target = reference.dereference()
if str(target.address) == '0x0':
return 'NULL'
else:
return target
class cstringprinter:
def __init__(self, value, maxlen=4096):
try:
ends = gdb.selected_inferior().search_memory(value.address, maxlen, b'\0')
if ends is not None:
maxlen = ends - int(str(value.address), 16)
self.size = str(maxlen)
else:
self.size = '%s+' % str(maxlen)
self.data = bytearray(gdb.selected_inferior().read_memory(value.address, maxlen))
except:
self.data = None
def to_string(self):
if self.data is None:
return 'NULL'
else:
return '\"%s\"(%s)' % (str(self.data).encode('string_escape').replace('"', '\\"').replace("'", "\\\\'"), self.size)
class boxintprinter:
def __init__(self, value):
self.value = value.cast(gdb.lookup_type('struct boxint_st'))
def to_string(self):
return '(struct boxint_st)%s' % str(self.value['ival'])
class boxstringprinter:
def __init__(self, value):
self.value = value.cast(gdb.lookup_type('struct boxstring_st'))
def to_string(self):
return '(struct boxstring_st)%s' % (self.value['strval'])
class boxsequenceprinter:
def __init__(self, value):
self.value = value.cast(gdb.lookup_type('struct boxsequence_st'))
def display_hint(self):
return 'array'
def to_string(self):
return '(struct boxsequence_st)(%s)' % str(self.value['slen'])
def children(self):
value = self.value
tag = str(value['tag'])
count = int(str(value['slen']))
result = []
if tag == 'tag_none':
for i in xrange(0, count):
result.append( ( '#%d' % i, deref(value['valtab'][i]['ptag']) ))
elif tag == 'tag_int':
for i in xrange(0, count):
result.append( ( '#%d' % i, deref(value['valtab'][i]['pint']) ))
elif tag == 'tag_string':
for i in xrange(0, count):
result.append( ( '#%d' % i, deref(value['valtab'][i]['pstr']) ))
elif tag == 'tag_sequence':
for i in xrange(0, count):
result.append( ( '#%d' % i, deref(value['valtab'][i]['pseq']) ))
return result
def typefilter(value):
"Pick a pretty-printer for 'value'."
typename = str(value.type.strip_typedefs().unqualified())
if typename == 'char []':
return cstringprinter(value)
if (typename == 'struct boxint_st' or
typename == 'struct boxstring_st' or
typename == 'struct boxsequence_st'):
tag = str(value['tag'])
if tag == 'tag_int':
return boxintprinter(value)
if tag == 'tag_string':
return boxstringprinter(value)
if tag == 'tag_sequence':
return boxsequenceprinter(value)
return None
gdb.pretty_printers.append(typefilter)
The reasoning behind my choices are as follows:
How to install pretty-printers to gdb?
There are two parts to this question: where to install the Python files, and how to hook the pretty-printers to gdb.
Because the pretty-printer selection cannot rely on the inferred type alone, but has to peek into the actual data fields, you cannot use the regular expression matching functions. Instead, I chose to add my own pretty-printer selector function, typefilter()
, to the global pretty-printers list, as described in the documentation. I did not implement the enable/disable functionality, because I believe it is easier to just load/not load the relevant Python script instead.
(typefilter()
gets called once per every variable reference, unless some other pretty-printer has already accepted it.)
The file location issue is a more complicated one. For application-specific pretty-printers, putting them into a single Python script file sounds sensible, but for a library, some splitting seems to be in order. The documentation recommends packaging the functions into a Python module, so that a simple python import module
enables the pretty-printer. Fortunately, Python packaging is quite straightforward. If you were to import gdb
to the top and save it to /usr/lib/pythonX.Y/tiny.py
, where X.Y
is the python version used, you only need to run python import tiny
in gdb to enable the pretty-printer.
Of course, properly packaging the pretty-printer is a very good idea, especially if you intend to distribute it, but it does pretty much boil down to adding some variables et cetera to the beginning of the script, assuming you keep it as a single file. For more complex pretty-printers, using a directory layout might be a good idea.
If you have a value val
, then val.type
is the gdb.Type object describing its type; converting it to string yields a human-readable type name.
val.type.strip_typedefs()
yields the actual type with all typedefs stripped. I even added .unqualified()
, so that all const/volatile/etc. type qualifiers are removed.
NULL pointer detection is a bit tricky.
The best way I found, was to examine the stringified .address
member of the target gdb.Value object, and see if it is "0x0"
.
To make life easier, I was able to write a simple deref()
function, which tries to dereference a pointer. If the target points to (void *)0, it returns the string "NULL"
, otherwise it returns the target gdb.Value object.
The way I use deref()
is based on the fact that "array"
type pretty-printers yield a list of 2-tuples, where the first item is the name string, and the second item is either a gdb.Value object, or a string. This list is returned by the children()
method of the pretty-printer object.
Handling "discriminated union" types would be much easier, if you had a separate type for the generic entity. That is, if you had
struct box_st {
enum tag_en tag;
};
and it was used everywhere when the tag
value is still uncertain; and the specific structure types only used where their tag
value is fixed. This would allow a much simpler type inference.
As it is, in tiny.c
the struct box*_st
types can be used interchangeably. (Or, more specifically, we cannot rely on a specific tag value based on the type alone.)
The sequence case is actually quite simple, because valtab[]
can be treated as simply as an array of void pointers. The sequence tag is used to pick the correct union member. In fact, if valtab[] was simply a void pointer array, then gdb.Value.cast(gdb.lookup_type()) or gdb.Value.reinterpret_cast(gdb.lookup_type()) can be used to change each pointer type as necessary, just like I do for the boxed structure types.
Recursion limits?
You can use the @
operator in print
command to specify how many elements are printed, but that does not help with nesting.
If you add iseq3->valtab[2] = (myval_t)iseq3;
to tiny.c
, you get an infinitely recursive sequence. gdb does print it nicely, especially with set print array
, but it does not notice or care about the recursion.
In my opinion, you might wish to write a gdb command in addition to a pretty-printer for deeply nested or recursive data structures. During my testing, I wrote a command that uses Graphviz to draw binary tree structures directly from within gdb; I'm absolutely convinced it beats plain text output.
Added: If you save the following as /usr/lib/pythonX.Y/tree.py
:
import subprocess
import gdb
def pretty(value, field, otherwise=''):
try:
if str(value[field].type) == 'char []':
data = str(gdb.selected_inferior().read_memory(value[field].address, 64))
try:
size = data.index("\0")
return '\\"%s\\"' % data[0:size].encode('string_escape').replace('"', '\\"').replace("'", "\\'")
except:
return '\\"%s\\"..' % data.encode('string_escape').replace('"', '\\"').replace("'", "\\'")
else:
return str(value[field])
except:
return otherwise
class tee:
def __init__(self, cmd, filename):
self.file = open(filename, 'wb')
gdb.write("Saving DOT to '%s'.\n" % filename)
self.cmd = cmd
def __del__(self):
if self.file is not None:
self.file.flush()
self.file.close()
self.file = None
def __call__(self, arg):
self.cmd(arg)
if self.file is not None:
self.file.write(arg)
def do_dot(value, output, visited, source, leg, label, left, right):
if value.type.code != gdb.TYPE_CODE_PTR:
return
target = value.dereference()
target_addr = int(str(target.address), 16)
if target_addr == 0:
return
if target_addr in visited:
if source is not None:
path='%s.%s' % (source, target_addr)
if path not in visited:
visited.add(path)
output('\t"%s" -> "%s" [ taillabel="%s" ];\n' % (source, target_addr, leg))
return
visited.add(target_addr)
if source is not None:
path='%s.%s' % (source, target_addr)
if path not in visited:
visited.add(path)
output('\t"%s" -> "%s" [ taillabel="%s" ];\n' % (source, target_addr, leg))
if label is None:
output('\t"%s" [ label="%s" ];\n' % (target_addr, target_addr))
elif "," in label:
lab = ''
for one in label.split(","):
cur = pretty(target, one, '')
if len(cur) > 0:
if len(lab) > 0:
lab = '|'.join((lab,cur))
else:
lab = cur
output('\t"%s" [ shape=record, label="{%s}" ];\n' % (target_addr, lab))
else:
output('\t"%s" [ label="%s" ];\n' % (target_addr, pretty(target, label, target_addr)))
if left is not None:
try:
target_left = target[left]
do_dot(target_left, output, visited, target_addr, left, label, left, right)
except:
pass
if right is not None:
try:
target_right = target[right]
do_dot(target_right, output, visited, target_addr, right, label, left, right)
except:
pass
class Tree(gdb.Command):
def __init__(self):
super(Tree, self).__init__('tree', gdb.COMMAND_DATA, gdb.COMPLETE_SYMBOL, False)
def do_invoke(self, name, filename, left, right, label, cmd, arg):
try:
node = gdb.selected_frame().read_var(name)
except:
gdb.write('No symbol "%s" in current context.\n' % str(name))
return
if len(arg) < 1:
cmdlist = [ cmd ]
else:
cmdlist = [ cmd, arg ]
sub = subprocess.Popen(cmdlist, bufsize=16384, stdin=subprocess.PIPE, stdout=None, stderr=None)
if filename is None:
output = sub.stdin.write
else:
output = tee(sub.stdin.write, filename)
output('digraph {\n')
output('\ttitle = "%s";\n' % name)
if len(label) < 1: label = None
if len(left) < 1: left = None
if len(right) < 1: right = None
visited = set((0,))
do_dot(node, output, visited, None, None, label, left, right)
output('}\n')
sub.communicate()
sub.wait()
def help(self):
gdb.write('Usage: tree [OPTIONS] variable\n')
gdb.write('Options:\n')
gdb.write(' left=name Name member pointing to left child\n')
gdb.write(' right=name Name right child pointer\n')
gdb.write(' label=name[,name] Define node fields\n')
gdb.write(' cmd=dot arg=-Tx11 Specify the command (and one option)\n')
gdb.write(' dot=filename.dot Save .dot to a file\n')
gdb.write('Suggestions:\n')
gdb.write(' tree cmd=neato variable\n')
def invoke(self, argument, from_tty):
args = argument.split()
if len(args) < 1:
self.help()
return
num = 0
cfg = { 'left':'left', 'right':'right', 'label':'value', 'cmd':'dot', 'arg':'-Tx11', 'dot':None }
for arg in args[0:]:
if '=' in arg:
key, val = arg.split('=', 1)
cfg[key] = val
else:
num += 1
self.do_invoke(arg, cfg['dot'], cfg['left'], cfg['right'], cfg['label'], cfg['cmd'], cfg['arg'])
if num < 1:
self.help()
Tree()
you can use it in gdb:
(gdb) python import tree
(gdb) tree
Usage: tree [OPTIONS] variable
Options:
left=name Name member pointing to left child
right=name Name right child pointer
label=name[,name] Define node fields
cmd=dot arg=-Tx11 Specify the command (and one option)
dot=filename.dot Save .dot to a file
Suggestions:
tree cmd=neato variable
If you have e.g.
struct node {
struct node *le;
struct node *gt;
long key;
char val[];
}
struct node *sometree;
and you have X11 (local or remote) connection and Graphviz installed, you can use
(gdb) tree left=le right=gt label=key,val sometree
to view the tree structure. Because it retains a list of already visited nodes (as a Python set), it does not get fazed about recursive structures.
I probably should have cleaned my Python snippets before posting, but no matter. Please do consider these only initial testing versions; use at your own risk. :)
If you love us? You can donate to us via Paypal or buy me a coffee so we can maintain and grow! Thank you!
Donate Us With