gallivm: JIT symbol resolution with linux perf.

Details on docs/llvmpipe.html

Reviewed-by: Brian Paul <brianp@vmware.com>
Reviewed-by: Roland Scheidegger <sroland@vmware.com>
This commit is contained in:
José Fonseca 2013-04-17 13:32:15 +01:00
parent 35ef27d485
commit b8f6858fcb
8 changed files with 372 additions and 86 deletions

251
bin/perf-annotate-jit Executable file
View File

@ -0,0 +1,251 @@
#!/usr/bin/env python
#
# Copyright 2012 VMware Inc
# Copyright 2008-2009 Jose Fonseca
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in
# all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
# THE SOFTWARE.
#
"""Perf annotate for JIT code.
Linux `perf annotate` does not work with JIT code. This script takes the data
produced by `perf script` command, plus the diassemblies outputed by gallivm
into /tmp/perf-XXXXX.map.asm and produces output similar to `perf annotate`.
See docs/llvmpipe.html for usage instructions.
The `perf script` output parser was derived from the gprof2dot.py script.
"""
import sys
import os.path
import re
import optparse
import subprocess
class Parser:
"""Parser interface."""
def __init__(self):
pass
def parse(self):
raise NotImplementedError
class LineParser(Parser):
"""Base class for parsers that read line-based formats."""
def __init__(self, file):
Parser.__init__(self)
self._file = file
self.__line = None
self.__eof = False
self.line_no = 0
def readline(self):
line = self._file.readline()
if not line:
self.__line = ''
self.__eof = True
else:
self.line_no += 1
self.__line = line.rstrip('\r\n')
def lookahead(self):
assert self.__line is not None
return self.__line
def consume(self):
assert self.__line is not None
line = self.__line
self.readline()
return line
def eof(self):
assert self.__line is not None
return self.__eof
mapFile = None
def lookupMap(filename, matchSymbol):
global mapFile
mapFile = filename
stream = open(filename, 'rt')
for line in stream:
start, length, symbol = line.split()
start = int(start, 16)
length = int(length,16)
if symbol == matchSymbol:
return start
return None
def lookupAsm(filename, desiredFunction):
stream = open(filename + '.asm', 'rt')
while stream.readline() != desiredFunction + ':\n':
pass
asm = []
line = stream.readline().strip()
while line:
addr, instr = line.split(':', 1)
addr = int(addr)
asm.append((addr, instr))
line = stream.readline().strip()
return asm
samples = {}
class PerfParser(LineParser):
"""Parser for linux perf callgraph output.
It expects output generated with
perf record -g
perf script
"""
def __init__(self, infile, symbol):
LineParser.__init__(self, infile)
self.symbol = symbol
def readline(self):
# Override LineParser.readline to ignore comment lines
while True:
LineParser.readline(self)
if self.eof() or not self.lookahead().startswith('#'):
break
def parse(self):
# read lookahead
self.readline()
while not self.eof():
self.parse_event()
asm = lookupAsm(mapFile, self.symbol)
addresses = samples.keys()
addresses.sort()
total_samples = 0
sys.stdout.write('%s:\n' % self.symbol)
for address, instr in asm:
try:
sample = samples.pop(address)
except KeyError:
sys.stdout.write(6*' ')
else:
sys.stdout.write('%6u' % (sample))
total_samples += sample
sys.stdout.write('%6u: %s\n' % (address, instr))
print 'total:', total_samples
assert len(samples) == 0
sys.exit(0)
def parse_event(self):
if self.eof():
return
line = self.consume()
assert line
callchain = self.parse_callchain()
if not callchain:
return
def parse_callchain(self):
callchain = []
while self.lookahead():
function = self.parse_call(len(callchain) == 0)
if function is None:
break
callchain.append(function)
if self.lookahead() == '':
self.consume()
return callchain
call_re = re.compile(r'^\s+(?P<address>[0-9a-fA-F]+)\s+(?P<symbol>.*)\s+\((?P<module>[^)]*)\)$')
def parse_call(self, first):
line = self.consume()
mo = self.call_re.match(line)
assert mo
if not mo:
return None
if not first:
return None
function_name = mo.group('symbol')
if not function_name:
function_name = mo.group('address')
module = mo.group('module')
function_id = function_name + ':' + module
address = mo.group('address')
address = int(address, 16)
if function_name != self.symbol:
return None
start_address = lookupMap(module, function_name)
address -= start_address
#print function_name, module, address
samples[address] = samples.get(address, 0) + 1
return True
def main():
"""Main program."""
optparser = optparse.OptionParser(
usage="\n\t%prog [options] symbol_name")
(options, args) = optparser.parse_args(sys.argv[1:])
if len(args) != 1:
optparser.error('wrong number of arguments')
symbol = args[0]
p = subprocess.Popen(['perf', 'script'], stdout=subprocess.PIPE, stderr=subprocess.PIPE)
parser = PerfParser(p.stdout, symbol)
parser.parse()
if __name__ == '__main__':
main()
# vim: set sw=4 et:

View File

@ -1601,13 +1601,6 @@ if test "x$enable_gallium_llvm" = xyes; then
LLVM_COMPONENTS="${LLVM_COMPONENTS} mcjit"
fi
if test "x$enable_debug" = xyes; then
# Debug builds require OProfileJIT if LLVM was built with support for it
if $LLVM_CONFIG --components | grep -q '\<oprofilejit\>'; then
LLVM_COMPONENTS="${LLVM_COMPONENTS} oprofilejit"
fi
fi
if test "x$enable_opencl" = xyes; then
LLVM_COMPONENTS="${LLVM_COMPONENTS} ipo linker instrumentation"
# LLVM 3.3 >= 177971 requires IRReader

View File

@ -130,38 +130,38 @@ need to ask, don't even try it.
<h1>Profiling</h1>
To profile llvmpipe you should pass the options
<p>
To profile llvmpipe you should build as
</p>
<pre>
scons build=profile &lt;same-as-before&gt;
</pre>
<p>
This will ensure that frame pointers are used both in C and JIT functions, and
that no tail call optimizations are done by gcc.
</p>
To better profile JIT code you'll need to build LLVM with oprofile integration.
<h2>Linux perf integration</h2>
<p>
On Linux, it is possible to have symbol resolution of JIT code with <a href="http://perf.wiki.kernel.org/">Linux perf</a>:
</p>
<pre>
./configure \
--prefix=$install_dir \
--enable-optimized \
--disable-profiling \
--enable-targets=host-only \
--with-oprofile
make -C "$build_dir"
make -C "$build_dir" install
find "$install_dir/lib" -iname '*.a' -print0 | xargs -0 strip --strip-debug
perf record -g /my/application
perf report
</pre>
The you should define
<p>
When run inside Linux perf, llvmpipe will create a /tmp/perf-XXXXX.map file with
symbol address table. It also dumps assembly code to /tmp/perf-XXXXX.map.asm,
which can be used by the bin/perf-annotate-jit script to produce disassembly of
the generated code annotated with the samples.
</p>
<pre>
export LLVM=/path/to/llvm-2.6-profile
</pre>
and rebuild.
<p>You can obtain a call graph via
<a href="http://code.google.com/p/jrfonseca/wiki/Gprof2Dot#linux_perf">Gprof2Dot</a>.</p>
<h1>Unit testing</h1>

View File

@ -31,6 +31,7 @@
#include <llvm/Target/TargetMachine.h>
#include <llvm/Target/TargetInstrInfo.h>
#include <llvm/Support/raw_ostream.h>
#include <llvm/Support/Format.h>
#include <llvm/Support/MemoryObject.h>
#if HAVE_LLVM >= 0x0300
@ -60,6 +61,11 @@
#include "lp_bld_debug.h"
#ifdef __linux__
#include <sys/stat.h>
#include <fcntl.h>
#endif
/**
@ -174,8 +180,8 @@ public:
* - http://blog.llvm.org/2010/01/x86-disassembler.html
* - http://blog.llvm.org/2010/04/intro-to-llvm-mc-project.html
*/
extern "C" void
lp_disassemble(const void* func)
static size_t
disassemble(const void* func, llvm::raw_ostream & Out)
{
#if HAVE_LLVM >= 0x0207
using namespace llvm;
@ -209,8 +215,8 @@ lp_disassemble(const void* func)
#endif
if (!AsmInfo) {
debug_printf("error: no assembly info for target %s\n", Triple.c_str());
return;
Out << "error: no assembly info for target " << Triple << "\n";
return 0;
}
#if HAVE_LLVM >= 0x0300
@ -220,12 +226,10 @@ lp_disassemble(const void* func)
OwningPtr<const MCDisassembler> DisAsm(T->createMCDisassembler());
#endif
if (!DisAsm) {
debug_printf("error: no disassembler for target %s\n", Triple.c_str());
return;
Out << "error: no disassembler for target " << Triple << "\n";
return 0;
}
raw_debug_ostream Out;
#if HAVE_LLVM >= 0x0300
unsigned int AsmPrinterVariant = AsmInfo->getAssemblerDialect();
#else
@ -235,14 +239,14 @@ lp_disassemble(const void* func)
#if HAVE_LLVM >= 0x0301
OwningPtr<const MCRegisterInfo> MRI(T->createMCRegInfo(Triple));
if (!MRI) {
debug_printf("error: no register info for target %s\n", Triple.c_str());
return;
Out << "error: no register info for target " << Triple.c_str() << "\n";
return 0;
}
OwningPtr<const MCInstrInfo> MII(T->createMCInstrInfo());
if (!MII) {
debug_printf("error: no instruction info for target %s\n", Triple.c_str());
return;
Out << "error: no instruction info for target " << Triple.c_str() << "\n";
return 0;
}
#endif
@ -260,8 +264,8 @@ lp_disassemble(const void* func)
T->createMCInstPrinter(AsmPrinterVariant, *AsmInfo, Out));
#endif
if (!Printer) {
debug_printf("error: no instruction printer for target %s\n", Triple.c_str());
return;
Out << "error: no instruction printer for target " << Triple.c_str() << "\n";
return 0;
}
#if HAVE_LLVM >= 0x0301
@ -300,7 +304,7 @@ lp_disassemble(const void* func)
* so that between runs.
*/
debug_printf("%6lu:\t", (unsigned long)pc);
Out << llvm::format("%6lu:\t", (unsigned long)pc);
if (!DisAsm->getInstruction(Inst, Size, memoryObject,
pc,
@ -309,7 +313,7 @@ lp_disassemble(const void* func)
#else
nulls())) {
#endif
debug_printf("invalid\n");
Out << "invalid";
pc += 1;
}
@ -320,25 +324,23 @@ lp_disassemble(const void* func)
if (0) {
unsigned i;
for (i = 0; i < Size; ++i) {
debug_printf("%02x ", ((const uint8_t*)bytes)[pc + i]);
Out << llvm::format("%02x ", ((const uint8_t*)bytes)[pc + i]);
}
for (; i < 16; ++i) {
debug_printf(" ");
Out << " ";
}
}
/*
* Print the instruction.
*/
#if HAVE_LLVM >= 0x0300
Printer->printInst(&Inst, Out, "");
Printer->printInst(&Inst, Out, "");
#elif HAVE_LLVM >= 0x208
Printer->printInst(&Inst, Out);
Printer->printInst(&Inst, Out);
#else
Printer->printInst(&Inst);
Printer->printInst(&Inst);
#endif
Out.flush();
/*
* Advance.
@ -386,7 +388,7 @@ lp_disassemble(const void* func)
* Output the address relative to the function start, given
* that MC will print the addresses relative the current pc.
*/
debug_printf("\t\t; %lu", (unsigned long)jump);
Out << "\t\t; " << jump;
/*
* Ignore far jumps given it could be actually a tail return to
@ -401,7 +403,7 @@ lp_disassemble(const void* func)
}
}
debug_printf("\n");
Out << "\n";
/*
* Stop disassembling on return statements, if there is no record of a
@ -420,12 +422,73 @@ lp_disassemble(const void* func)
*/
if (0) {
debug_printf("disassemble %p %p\n", bytes, bytes + pc);
_debug_printf("disassemble %p %p\n", bytes, bytes + pc);
}
debug_printf("\n");
Out << "\n";
Out.flush();
return pc;
#else /* HAVE_LLVM < 0x0207 */
(void)func;
return 0;
#endif /* HAVE_LLVM < 0x0207 */
}
extern "C" void
lp_disassemble(LLVMValueRef func, const void *code) {
raw_debug_ostream Out;
disassemble(code, Out);
}
/*
* Linux perf profiler integration.
*
* See also:
* - http://penberg.blogspot.co.uk/2009/06/jato-has-profiler.html
* - https://github.com/penberg/jato/commit/73ad86847329d99d51b386f5aba692580d1f8fdc
* - http://git.kernel.org/?p=linux/kernel/git/torvalds/linux.git;a=commitdiff;h=80d496be89ed7dede5abee5c057634e80a31c82d
*/
extern "C" void
lp_profile(LLVMValueRef func, const void *code)
{
#if defined(__linux__) && (defined(DEBUG) || defined(PROFILE))
static boolean first_time = TRUE;
static FILE *perf_map_file = NULL;
static int perf_asm_fd = -1;
if (first_time) {
/*
* We rely on the disassembler for determining a function's size, but
* the disassembly is a leaky and slow operation, so avoid running
* this except when running inside linux perf, which can be inferred
* by the PERF_BUILDID_DIR environment variable.
*/
if (getenv("PERF_BUILDID_DIR")) {
pid_t pid = getpid();
char filename[256];
util_snprintf(filename, sizeof filename, "/tmp/perf-%llu.map", (unsigned long long)pid);
perf_map_file = fopen(filename, "wt");
util_snprintf(filename, sizeof filename, "/tmp/perf-%llu.map.asm", (unsigned long long)pid);
mode_t mode = S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH;
perf_asm_fd = open(filename, O_WRONLY | O_CREAT, mode);
}
first_time = FALSE;
}
if (perf_map_file) {
const char *symbol = LLVMGetValueName(func);
unsigned long addr = (uintptr_t)code;
llvm::raw_fd_ostream Out(perf_asm_fd, false);
Out << symbol << ":\n";
unsigned long size = disassemble(code, Out);
fprintf(perf_map_file, "%lx %lx %s\n", addr, size, symbol);
fflush(perf_map_file);
}
#else
(void)func;
(void)code;
#endif
}

View File

@ -83,7 +83,11 @@ lp_check_alignment(const void *ptr, unsigned alignment);
void
lp_disassemble(const void* func);
lp_disassemble(LLVMValueRef func, const void *code);
void
lp_profile(LLVMValueRef func, const void *code);
#ifdef __cplusplus

View File

@ -273,10 +273,6 @@ init_gallivm_engine(struct gallivm_state *gallivm)
LLVMDisposeMessage(error);
goto fail;
}
#if defined(DEBUG) || defined(PROFILE)
lp_register_oprofile_jit_event_listener(gallivm->engine);
#endif
}
LLVMAddModuleProvider(gallivm->engine, gallivm->provider);//new
@ -635,6 +631,7 @@ gallivm_compile_module(struct gallivm_state *gallivm)
}
func_pointer
gallivm_jit_function(struct gallivm_state *gallivm,
LLVMValueRef func)
@ -650,9 +647,13 @@ gallivm_jit_function(struct gallivm_state *gallivm,
jit_func = pointer_to_func(code);
if (gallivm_debug & GALLIVM_DEBUG_ASM) {
lp_disassemble(code);
lp_disassemble(func, code);
}
#if defined(PROFILE)
lp_profile(func, code);
#endif
/* Free the function body to save memory */
lp_func_delete_body(func);

View File

@ -54,7 +54,6 @@
#include <llvm-c/ExecutionEngine.h>
#include <llvm/Target/TargetOptions.h>
#include <llvm/ExecutionEngine/ExecutionEngine.h>
#include <llvm/ExecutionEngine/JITEventListener.h>
#if HAVE_LLVM >= 0x0301
#include <llvm/ADT/Triple.h>
#include <llvm/ExecutionEngine/JITMemoryManager.h>
@ -75,28 +74,6 @@
#include "lp_bld_misc.h"
/**
* Register the engine with oprofile.
*
* This allows to see the LLVM IR function names in oprofile output.
*
* To actually work LLVM needs to be built with the --with-oprofile configure
* option.
*
* Also a oprofile:oprofile user:group is necessary. Which is not created by
* default on some distributions.
*/
extern "C" void
lp_register_oprofile_jit_event_listener(LLVMExecutionEngineRef EE)
{
#if HAVE_LLVM >= 0x0301
llvm::unwrap(EE)->RegisterJITEventListener(llvm::JITEventListener::createOProfileJITEventListener());
#else
llvm::unwrap(EE)->RegisterJITEventListener(llvm::createOProfileJITEventListener());
#endif
}
extern "C" void
lp_set_target_options(void)
{

View File

@ -40,9 +40,6 @@ extern "C" {
extern void
lp_register_oprofile_jit_event_listener(LLVMExecutionEngineRef EE);
extern void
lp_set_target_options(void);