Thursday, 28 May 2009

A really simple tracing debugger: part 2

In my last post I showed how to get a readable execution trace of a process using a simple ptrace()-based tracer (itrace) and binutils' addr2line. By doing a bit more work we can extract the function call tree from the trace. In order to distinguish callers and callees, we can use the stack pointer (%esp), which itrace outputs.

Here's the call tree for a "hello world" program using glibc 2.9 on i386, statically linked:

$ ./itrace ./hellow-static | python treeify.py ./hellow-static
_start ??:0 8048130
  __libc_start_main glibc/csu/libc-start.c:93 8048220
    _dl_aux_init glibc/elf/dl-support.c:165 80518e0
    _dl_discover_osversion glibc/elf/../sysdeps/unix/sysv/linux/dl-sysdep.c:67 80521e0
      __uname ??:0 806e160
    __pthread_initialize_minimal glibc/csu/libc-tls.c:247 8048750
      __libc_setup_tls glibc/csu/libc-tls.c:111 8048550
        __sbrk glibc/misc/sbrk.c:34 80505d0
          __brk glibc/misc/../sysdeps/unix/sysv/linux/i386/brk.c:36 806f9c0
          __brk glibc/misc/../sysdeps/unix/sysv/linux/i386/brk.c:36 806f9c0
        memcpy glibc/string/memcpy.c:33 804fc10
      init_slotinfo glibc/csu/libc-tls.c:83 80486cb
        init_static_tls glibc/csu/libc-tls.c:100 8048708
  _dl_setup_stack_chk_guard glibc/csu/../sysdeps/unix/sysv/linux/dl-osinfo.h:76 8048283
    __libc_init_first glibc/csu/../sysdeps/unix/sysv/linux/init-first.c:44 80522f0
      __setfpucw glibc/math/../sysdeps/i386/setfpucw.c:29 8056270
      __libc_init_secure glibc/elf/enbl-secure.c:34 8052190
      _dl_non_dynamic_init glibc/elf/dl-support.c:235 8051b50
        getenv glibc/stdlib/getenv.c:36 8056ea0
          strlen glibc/string/../sysdeps/i386/i486/strlen.S:34 804f9f0
        getenv glibc/stdlib/getenv.c:36 8056ea0
          strlen glibc/string/../sysdeps/i386/i486/strlen.S:34 804f9f0
        _dl_init_paths glibc/elf/dl-load.c:630 8072ae0
          _dl_important_hwcaps glibc/elf/dl-support.c:306 8051b20
          __libc_malloc glibc/malloc/malloc.c:3540 804eb60
            malloc_hook_ini glibc/malloc/hooks.c:35 804f360
              ptmalloc_init glibc/malloc/arena.c:426 804b370
                ptmalloc_init_minimal glibc/malloc/arena.c:374 804b39f
                  __getpagesize glibc/misc/../sysdeps/unix/sysv/linux/getpagesize.c:29 8050650
                  __linkin_atfork glibc/nptl/../nptl/sysdeps/unix/sysv/linux/register-atfork.c:115 80513a0
                next_env_entry glibc/malloc/arena.c:344 804b45d
            __libc_malloc glibc/malloc/malloc.c:3540 804eb60
              _int_malloc glibc/malloc/malloc.c:4130 804cae0
                malloc_consolidate glibc/malloc/malloc.c:4835 804afd0
                malloc_init_state glibc/malloc/malloc.c:2412 804a8e0
              sYSMALLOc glibc/malloc/malloc.c:2929 804d200
                __default_morecore glibc/malloc/morecore.c:48 804f9c0
                  __sbrk glibc/misc/sbrk.c:34 80505d0
                    __brk glibc/misc/../sysdeps/unix/sysv/linux/i386/brk.c:36 806f9c0
                __default_morecore glibc/malloc/morecore.c:48 804f9c0
                  __sbrk glibc/misc/sbrk.c:34 80505d0
                    __brk glibc/misc/../sysdeps/unix/sysv/linux/i386/brk.c:36 806f9c0
          __libc_malloc glibc/malloc/malloc.c:3540 804eb60
            _int_malloc glibc/malloc/malloc.c:4130 804cae0
        getenv glibc/stdlib/getenv.c:36 8056ea0
          strlen glibc/string/../sysdeps/i386/i486/strlen.S:34 804f9f0
        getenv glibc/stdlib/getenv.c:36 8056ea0
          strlen glibc/string/../sysdeps/i386/i486/strlen.S:34 804f9f0
        getenv glibc/stdlib/getenv.c:36 8056ea0
          strlen glibc/string/../sysdeps/i386/i486/strlen.S:34 804f9f0
        getenv glibc/stdlib/getenv.c:36 8056ea0
          strlen glibc/string/../sysdeps/i386/i486/strlen.S:34 804f9f0
      dl_platform_init glibc/elf/../sysdeps/i386/dl-machine.h:273 8051c38
        getenv glibc/stdlib/getenv.c:36 8056ea0
          strlen glibc/string/../sysdeps/i386/i486/strlen.S:34 804f9f0
    __init_misc glibc/misc/init-misc.c:31 8051120
      rindex glibc/string/../sysdeps/i386/strrchr.S:37 8067a30
    __cxa_atexit glibc/stdlib/cxa_atexit.c:34 8048ab0
      __new_exitfn glibc/stdlib/cxa_atexit.c:63 8048960
    __libc_csu_init glibc/csu/elf-init.c:65 80487b0
      _init /build/buildd/glibc-2.7/build-tree/amd64-i386/csu/crti.S:15 80480f4
        _init ??:0 8048116
          frame_dummy crtstuff.c:0 80481b0
            __register_frame_info ??:0 80a2b50
              __register_frame_info_bases ??:0 80a2ab0
                __i686.get_pc_thunk.bx ??:0 80a29f9
          __do_global_ctors_aux crtstuff.c:0 80a4b10
        _init /build/buildd/glibc-2.7/build-tree/amd64-i386/csu/crtn.S:10 8048120
    _setjmp glibc/setjmp/../sysdeps/i386/bsd-_setjmp.S:36 8048830
    main ??:0 80481f0
      _IO_puts glibc/libio/ioputs.c:34 8048b10
        strlen glibc/string/../sysdeps/i386/i486/strlen.S:34 804f9f0
        _IO_new_file_xsputn glibc/libio/fileops.c:1288 8065870
          _IO_new_file_overflow glibc/libio/fileops.c:829 80661a0
            _IO_doallocbuf glibc/libio/genops.c:419 8049780
              _IO_file_doallocate glibc/libio/filedoalloc.c:88 808bd50
                _IO_file_stat glibc/libio/fileops.c:1225 8065b90
                  ___fxstat64 glibc/io/../sysdeps/unix/sysv/linux/fxstat64.c:46 804fe20
                mmap glibc/misc/../sysdeps/unix/sysv/linux/i386/mmap.S:81 8051080
                _IO_setb glibc/libio/genops.c:404 8049670
            _IO_new_do_write glibc/libio/fileops.c:493 8065a50
          _IO_default_xsputn glibc/libio/genops.c:452 8049850
      _IO_acquire_lock_fct glibc/libio/libioP.h:968 8048b8d
    exit glibc/stdlib/exit.c:34 8048870
      __libc_csu_fini glibc/csu/elf-init.c:91 8048770
      _fini /build/buildd/glibc-2.7/build-tree/amd64-i386/csu/crti.S:41 80a5574
        _fini ??:0 80a5587
          __do_global_dtors_aux crtstuff.c:0 8048160
            fini glibc/dlfcn/dlerror.c:207 80975e0
              check_free glibc/dlfcn/dlerror.c:188 8097560
            __deregister_frame_info ??:0 80a3c30
            __deregister_frame_info_bases ??:0 80a3b20
              __i686.get_pc_thunk.bx ??:0 80a29f9
        _fini /build/buildd/glibc-2.7/build-tree/amd64-i386/csu/crtn.S:21 80a558c
      _IO_cleanup glibc/libio/genops.c:1007 8049fd0
        _IO_flush_all_lockp glibc/libio/genops.c:823 8049db0
          _IO_new_file_overflow glibc/libio/fileops.c:829 80661a0
            _IO_new_do_write glibc/libio/fileops.c:493 8065a50
              new_do_write glibc/libio/fileops.c:505 8065760
                _IO_new_file_write glibc/libio/fileops.c:1261 8065a80
                  ?? ??:0 8
                __libc_write ??:0 804ffa0
                  __write_nocancel ??:0 804ffaa
      _IO_unbuffer_write glibc/libio/genops.c:951 8049fe8
        _IO_new_file_setbuf glibc/libio/fileops.c:445 8066380
          _IO_default_setbuf glibc/libio/genops.c:562 80496d0
            _IO_new_file_sync glibc/libio/fileops.c:891 80660d0
            _IO_setb glibc/libio/genops.c:404 8049670
      _exit glibc/posix/../sysdeps/unix/sysv/linux/i386/_exit.S:25 804fdc0
Believe it or not, this trace is quite illuminating. Working out that these calls will happen by reading the glibc source could take a long time!

The Python script below (treeify.py) does two things:

  • Firstly, it filters the trace to include only function entry points. We can just about infer function entry points from addr2line's output. We assume that when a function name (plus filename and line number) appears for the first time in the trace, the current address is the function's entry point. This doesn't work fully when inline functions are instantiated multiple times though. We could use nm to find symbol addresses, but addr2line gives us source filenames.
  • Secondly, it works out the nesting of the call tree by looking at the stack pointer.
import subprocess
import sys

def read():
    for line in sys.stdin:
        try:
            regs = [int(x, 16) for x in line.split(" ")]
            yield {"eip": regs[0], "esp": regs[1]}
        # Ignore lines interspersed with other output!
        except (ValueError, IndexError):
            pass

def addr2line(iterable):
    proc = subprocess.Popen(["addr2line", "-e", sys.argv[1], "-f"],
                            stdin=subprocess.PIPE, stdout=subprocess.PIPE)
    for regs in iterable:
        proc.stdin.write("%x\n" % regs["eip"])
        a = proc.stdout.readline().rstrip("\n")
        b = proc.stdout.readline().rstrip("\n")
        regs["func"] = "%s %s" % (a, b)
        yield regs

def entry_points(iterable):
    funcs = {}
    # We treat the first address we see for the function as its entry
    # point, and only report those entries from this point on.
    for regs in iterable:
        func = regs["func"].split(":")[0]
        if funcs.setdefault(func, regs["eip"]) == regs["eip"]:
            yield regs

def add_nesting(iterable):
    stack = [2 ** 64]
    for regs in iterable:
        stack_pos = regs["esp"]
        if stack_pos < stack[-1]:
            stack.append(stack_pos)
        while stack_pos > stack[-1]:
            stack.pop()
        regs["indent"] = "  " * len(stack)
        yield regs

for x in add_nesting(entry_points(addr2line(read()))):
    print x["indent"], x["func"], "%x" % x["eip"]

No comments: