citrun

watch C/C++ source code execute
Log | Files | Refs | LICENSE

commit 44b00653e7bb3414c5327dab39325bc3d3a08019
parent f5a4740d11850ad1d07433b18110bd4682cfb2bf
Author: Kyle Milz <kyle@getaddrinfo.net>
Date:   Sun, 12 Jun 2016 18:33:12 -0600

src: modify linking yet again

- now at link time generate a .c file that contains arrays of nodes
- the idea here is that by knowing the link command line, you can add nodes we
  know we instrumented to this patch file
- then the runtime has an easy job of reading the counter information

Diffstat:
MSCV/Project.pm | 4++--
Mlib/runtime.c | 97++++++++++++++++++++++++++++++-------------------------------------------------
Mlib/runtime.h | 4+---
Msrc/inst_action.cc | 44+++++++++++---------------------------------
Msrc/inst_ast_visitor.cc | 17-----------------
Msrc/inst_main.cc | 98++++++++++++++++++++++++++++++++++++++++++++++++-------------------------------
Msrc/runtime_h.h | 4+---
Mt/fibonacci.t | 2+-
Mt/for.t | 2+-
Mt/hello_world.t | 2+-
Mt/if.t | 2+-
Mt/inst_preamble.t | 8++------
Mt/return.t | 2+-
Mt/runtime_sanity.t | 12++++++------
Mt/switch.t | 2+-
Mt/while.t | 2+-
16 files changed, 127 insertions(+), 175 deletions(-)

diff --git a/SCV/Project.pm b/SCV/Project.pm @@ -69,7 +69,7 @@ sub instrumented_src { open( my $inst_fh, "<", "$self->{tmp_dir}/source_0.c" ); # Knock off the instrumentation preamble - my $line = <$inst_fh> for (1..26); + my $line = <$inst_fh> for (1..22); my $inst_src; while (my $line = <$inst_fh>) { @@ -85,7 +85,7 @@ sub inst_src_preamble { open( my $inst_fh, "<", "$self->{tmp_dir}/source_0.c" ); my $preamble; - for (1..26) { + for (1..22) { my $line = <$inst_fh>; $preamble .= $line; } diff --git a/lib/runtime.c b/lib/runtime.c @@ -1,21 +1,22 @@ #include <assert.h> -#include <err.h> -#include <limits.h> // PATH_MAX -#include <pthread.h> -#include <stdlib.h> // getenv +#include <err.h> /* err, errx, warnx */ +#include <limits.h> /* PATH_MAX */ +#include <pthread.h> /* pthread_create */ +#include <stdlib.h> /* getenv */ #include <string.h> #include <sys/socket.h> #include <sys/un.h> #if __APPLE__ -#include <sys/types.h> // read -#include <sys/uio.h> // read +#include <sys/types.h> /* read */ +#include <sys/uio.h> /* read */ #endif -#include <unistd.h> // read, getpid, getppid, getpgrp +#include <unistd.h> /* read, getpid, getppid, getpgrp */ #include "runtime.h" -/* Entry point into instrumented application */ -extern struct _citrun_node _citrun_tu_head; +/* Entrance into instrumented application. */ +extern struct citrun_node *citrun_nodes[]; +extern uint64_t citrun_nodes_total; void send_metadata(int); void send_execution_data(int); @@ -23,19 +24,8 @@ void send_execution_data(int); int xread(int d, const void *buf, size_t bytes_total); int xwrite(int d, const void *buf, size_t bytes_total); -/* - * Dummy function to make sure that the instrumented program gets linked against - * this library. - * Linux likes to liberally discard -l... flags given when linking. - */ -void -libscv_init() -{ -} -/* - * Sets up connection to the server socket and drops into an io loop. - */ +/* Sets up connection to the server socket and drops into an io loop. */ void * control_thread(void *arg) { @@ -75,55 +65,43 @@ control_thread(void *arg) } } -/* - * Walks the translation unit list and writes all of the static information - * contained in the nodes. - */ +/* Walk the node array and send all of the static metadata information. */ void send_metadata(int fd) { - struct _citrun_node walk = _citrun_tu_head; - pid_t process_id, parent_process_id, process_group; - uint64_t num_tus = 0; + struct citrun_node walk; + pid_t pids[3]; size_t file_name_sz; + int i; - /* Send the total number of translation unit records we'll send later */ - while (1) { - ++num_tus; + /* Send the total number of instrumented nodes. */ + xwrite(fd, &citrun_nodes_total, sizeof(citrun_nodes_total)); - if (walk.next == NULL) - break; - walk = *walk.next; - } - xwrite(fd, &num_tus, sizeof(num_tus)); - - /* Send process id, parent process id and group process id. */ - process_id = getpid(); - parent_process_id = getppid(); - process_group = getpgrp(); + /* Send process id, parent process id, group process id. */ + pids[0] = getpid(); + pids[1] = getppid(); + pids[2] = getpgrp(); assert(sizeof(pid_t) == 4); - xwrite(fd, &process_id, sizeof(pid_t)); - xwrite(fd, &parent_process_id, sizeof(pid_t)); - xwrite(fd, &process_group, sizeof(pid_t)); + for (i = 0; i < (sizeof(pids) / sizeof(pids[0])); i++) + xwrite(fd, &pids[i], sizeof(pid_t)); - walk = _citrun_tu_head; - /* Send translation unit records */ - while (1) { - /* Send file name size and then the file name itself. */ + /* Send instrumented object file information, consisting of: */ + for (i = 0; i < citrun_nodes_total; i++) { + walk = *citrun_nodes[i]; + + /* Length of the original source file name. */ file_name_sz = strnlen(walk.file_name, PATH_MAX); xwrite(fd, &file_name_sz, sizeof(file_name_sz)); + + /* The original source file name. */ xwrite(fd, walk.file_name, file_name_sz); - /* Send the size of the execution buffers */ + /* Size of the execution counters. */ xwrite(fd, &walk.size, sizeof(walk.size)); - /* Send the total number of instrumentation sites */ + /* Number of instrumentation sites. */ xwrite(fd, &walk.inst_sites, sizeof(walk.size)); - - if (walk.next == NULL) - break; - walk = *walk.next; } } @@ -134,15 +112,14 @@ send_metadata(int fd) void send_execution_data(int fd) { - struct _citrun_node walk = _citrun_tu_head; + struct citrun_node walk; + int i; + + for (i = 0; i < citrun_nodes_total; i++) { + walk = *citrun_nodes[i]; - while (1) { /* Write execution buffer, one 8 byte counter per source line */ xwrite(fd, walk.lines_ptr, walk.size * sizeof(uint64_t)); - - if (walk.next == NULL) - break; - walk = *walk.next; } } diff --git a/lib/runtime.h b/lib/runtime.h @@ -1,9 +1,7 @@ #include <stdint.h> -struct _citrun_node { +struct citrun_node { uint64_t *lines_ptr; uint32_t size; uint32_t inst_sites; const char *file_name; - struct _citrun_node *next; }; -void libscv_init(); diff --git a/src/inst_action.cc b/src/inst_action.cc @@ -41,37 +41,24 @@ get_current_node(std::string file_path) size_t last_slash = file_path.find_last_of('/'); std::string fn(file_path.substr(last_slash + 1)); - std::replace(fn.begin(), fn.end(), '.', '_'); - std::replace(fn.begin(), fn.end(), '-', '_'); + size_t period = fn.find_first_of('.'); - return fn; + return fn.substr(0, period); } -std::string -swap_last_node(std::string curr_node) +void +append_curr_node(std::string curr_node) { char *cwd = getcwd(NULL, PATH_MAX); if (cwd == NULL) errx(1, "getcwd"); - std::string src_number_filename(cwd); - src_number_filename.append("/LAST_NODE"); - - std::string last_node("NULL"); - - if (access(src_number_filename.c_str(), F_OK) == 0) { - // LAST_NODE exists, read last_node from file - std::ifstream src_number_file(src_number_filename); - src_number_file >> last_node; - src_number_file.close(); - } + std::string inst_filename(cwd); + inst_filename.append("/INSTRUMENTED"); - // Always write curr_node to file - std::ofstream src_number_file(src_number_filename); - src_number_file << curr_node; - src_number_file.close(); - - return last_node; + // Append current primary source file to INSTRUMENTED list. + std::ofstream inst_ofstream(inst_filename, std::ofstream::app); + inst_ofstream << curr_node << std::endl; } void @@ -89,9 +76,7 @@ InstrumentAction::EndSourceFileAction() std::string file_name = getCurrentFile(); std::string curr_node = get_current_node(file_name); - std::string last_node = swap_last_node(curr_node); - - //std::cerr << "LAST NODE = " << last_node << std::endl; + append_curr_node(curr_node); std::stringstream ss; // Add preprocessor stuff so that the C runtime library links against @@ -109,19 +94,12 @@ InstrumentAction::EndSourceFileAction() // Get visitor instance to check how many times it rewrote something RewriteASTVisitor visitor = InstrumentASTConsumer->get_visitor(); - // Let the struct know this definition will be elsewhere - ss << "extern struct _citrun_node _citrun_node_" << last_node << ";" << std::endl; - // Define this translation units main book keeping data structure - ss << "struct _citrun_node _citrun_node_" << curr_node << " = {" << std::endl + ss << "struct citrun_node citrun_node_" << curr_node << " = {" << std::endl << " .lines_ptr = _citrun_lines," << std::endl << " .size = " << num_lines << "," << std::endl << " .inst_sites = " << visitor.GetRewriteCount() << "," << std::endl << " .file_name = \"" << file_name << "\"," << std::endl; - if (last_node.compare("NULL") == 0) - ss << " .next = NULL," << std::endl; - else - ss << " .next = &_citrun_node_" << last_node << "," << std::endl; ss << "};" << std::endl; // Close extern "C" { diff --git a/src/inst_ast_visitor.cc b/src/inst_ast_visitor.cc @@ -66,23 +66,6 @@ RewriteASTVisitor::VisitFunctionDecl(clang::FunctionDecl *f) if (f->hasBody() == 0) return true; - clang::Stmt *FuncBody = f->getBody(); - - clang::DeclarationName DeclName = f->getNameInfo().getName(); - std::string FuncName = DeclName.getAsString(); - - if (FuncName.compare("main") != 0) - // Function is not main - return true; - - std::stringstream ss; - // On some platforms we need to depend directly on a symbol provided by - // the runtime. Normally this isn't needed because the runtime only - // depends on symbols in the isntrumented application. - ss << "libscv_init();"; - clang::SourceLocation curly_brace(FuncBody->getLocStart().getLocWithOffset(1)); - TheRewriter.InsertTextBefore(curly_brace, ss.str()); - return true; } diff --git a/src/inst_main.cc b/src/inst_main.cc @@ -21,6 +21,7 @@ #include <clang/Tooling/Tooling.h> #include "inst_action.h" +#include "runtime_h.h" #define STR_EXPAND(tok) #tok #define STR(tok) STR_EXPAND(tok) @@ -146,6 +147,64 @@ restore_original_src(std::map<std::string, std::string> const &temp_file_map) } } +void +patch_link_command(std::vector<char *> &args) +{ + std::string inst_files_list("INSTRUMENTED"); + + if (access(inst_files_list.c_str(), F_OK)) { + warnx("No instrumented object files found."); + if (execvp(args[0], &args[0])) + err(1, "execvp"); + } + + // std::cerr << "Link detected. Arguments are:" << std::endl; + // for (auto &arg : args) + // std::cerr << " '" << arg << "', " << std::endl; + + std::vector<std::string> instrumented_files; + std::ifstream inst_files_ifstream(inst_files_list); + + std::string temp_line; + while (std::getline(inst_files_ifstream, temp_line)) + instrumented_files.push_back(temp_line); + + inst_files_ifstream.close(); + + // std::cerr << "Instrumented object files are:" << std::endl; + // for (auto &line : instrumented_files) + // std::cerr << " '" << line << "', " << std::endl; + + std::ofstream patch_ofstream("citrun_patch.c"); + + // Inject the runtime header. + patch_ofstream << runtime_h << std::endl; + + for (auto &line : instrumented_files) + patch_ofstream << "extern struct citrun_node citrun_node_" << line << ";" << std::endl; + + int num_tus = instrumented_files.size(); + patch_ofstream << "struct citrun_node *citrun_nodes["; + patch_ofstream << num_tus << "] = {" << std::endl; + + for (auto &line : instrumented_files) + patch_ofstream << "\t&citrun_node_" << line << ", " << std::endl; + patch_ofstream << "};" << std::endl; + + patch_ofstream << "uint64_t citrun_nodes_total = " << num_tus << ";" << std::endl; + patch_ofstream.close(); + + args.push_back(const_cast<char *>("citrun_patch.c")); + + char *lib_str; + if ((lib_str = getenv("CITRUN_LIB")) == NULL) + errx(1, "CITRUN_LIB not found in environment."); + + // Add the runtime library and the symbol define hack + // automatically to the command line + args.push_back(lib_str); +} + int main(int argc, char *argv[]) { @@ -214,42 +273,8 @@ main(int argc, char *argv[]) // gcc -o main main.c fib.c linking = true; - std::string last_node_path("LAST_NODE"); if (linking) { - if (access(last_node_path.c_str(), F_OK)) { - // Couldn't access the LAST_NODE file, we cannot link - // to the runtime library without it. - warnx("LAST_NODE file not found."); - if (execvp(argv[0], argv)) - err(1, "execvp"); - } - - std::ifstream last_node_ifstream(last_node_path); - std::string last_node; - - last_node_ifstream >> last_node; - last_node_ifstream.close(); - - // We need to link the entry point in the runtime to the - // instrumented application. OS independent. - std::stringstream defsym_arg; -#ifdef __APPLE__ - defsym_arg << "-Wl,-alias,__citrun_node_"; - defsym_arg << last_node; - defsym_arg << ",__citrun_tu_head"; -#else - defsym_arg << "-Wl,--defsym=_citrun_tu_head=_citrun_node_"; - defsym_arg << last_node; -#endif - - char *lib_str; - if ((lib_str = getenv("CITRUN_LIB")) == NULL) - errx(1, "CITRUN_LIB not found in environment."); - - // Add the runtime library and the symbol define hack - // automatically to the command line - args.push_back(strdup(defsym_arg.str().c_str())); - args.push_back(lib_str); + patch_link_command(args); } // Instrumentation succeeded. Run the native compiler with a possibly @@ -271,7 +296,4 @@ main(int argc, char *argv[]) err(1, "waitpid"); restore_original_src(temp_file_map); - - if (linking) - unlink(last_node_path.c_str()); } diff --git a/src/runtime_h.h b/src/runtime_h.h @@ -1,12 +1,10 @@ static const char runtime_h[] = "#include <stdint.h>\n" "#include <stddef.h>\n" -"struct _citrun_node {\n" +"struct citrun_node {\n" " uint64_t *lines_ptr;\n" " uint32_t size;\n" " uint32_t inst_sites;\n" " const char *file_name;\n" -" struct _citrun_node *next;\n" "};\n" -"void libscv_init();\n" ; diff --git a/t/fibonacci.t b/t/fibonacci.t @@ -60,7 +60,7 @@ fibonacci(long long n) int main(int argc, char *argv[]) -{libscv_init(); +{ long long n; if ((++_citrun_lines[20], argc != 2)) { diff --git a/t/for.t b/t/for.t @@ -27,7 +27,7 @@ $project->compile(); my $inst_src_good = <<EOF; int main(void) -{libscv_init(); +{ int i; for (i = 0; (++_citrun_lines[6], i < 19); i++) { diff --git a/t/hello_world.t b/t/hello_world.t @@ -26,7 +26,7 @@ my $inst_src_good = <<EOF; int main(void) -{libscv_init(); +{ (++_citrun_lines[6], printf("hello, world!")); return (++_citrun_lines[7], 0); } diff --git a/t/if.t b/t/if.t @@ -36,7 +36,7 @@ my $inst_src_good = <<EOF; int main(int argc, char *argv[]) -{libscv_init(); +{ if ((++_citrun_lines[6], argc == 1)) return (++_citrun_lines[7], 1); else diff --git a/t/inst_preamble.t b/t/inst_preamble.t @@ -27,23 +27,19 @@ extern "C" { #endif #include <stdint.h> #include <stddef.h> -struct _citrun_node { +struct citrun_node { uint64_t *lines_ptr; uint32_t size; uint32_t inst_sites; const char *file_name; - struct _citrun_node *next; }; -void libscv_init(); static uint64_t _citrun_lines[6]; -extern struct _citrun_node _citrun_node_NULL; -struct _citrun_node _citrun_node_source_0_c = { +struct citrun_node citrun_node_source_0 = { .lines_ptr = _citrun_lines, .size = 6, .inst_sites = 1, .file_name = "$tmp_dir/source_0.c", - .next = NULL, }; #ifdef __cplusplus } diff --git a/t/return.t b/t/return.t @@ -29,7 +29,7 @@ int foo() { return (++_citrun_lines[2], 0); } -int main(void) {libscv_init(); +int main(void) { return (++_citrun_lines[6], 10); return (++_citrun_lines[8], 10 + 10); diff --git a/t/runtime_sanity.t b/t/runtime_sanity.t @@ -65,22 +65,22 @@ my $runtime_metadata = $viewer->get_metadata(); my $tus = $runtime_metadata->{tus}; my ($source_0, $source_1, $source_2) = @$tus; -like( $source_0->{filename}, qr/.*source_2.c/, "runtime filename check 0" ); -is( $source_0->{lines}, 9, "runtime line count check 0" ); +like( $source_0->{filename}, qr/.*source_0.c/, "runtime filename check 0" ); +is( $source_0->{lines}, 20, "runtime line count check 0" ); #is( $source_0->{inst_sites}, 7, "instrumented site count 0" ); like( $source_1->{filename}, qr/.*source_1.c/, "runtime filename check 1" ); is( $source_1->{lines}, 11, "runtime line count check 1" ); #is( $source_1->{inst_sites}, 7, "instrumented site count 1" ); -like( $source_2->{filename}, qr/.*source_0.c/, "runtime filename check 2" ); -is( $source_2->{lines}, 20, "runtime line count check 2" ); +like( $source_2->{filename}, qr/.*source_2.c/, "runtime filename check 2" ); +is( $source_2->{lines}, 9, "runtime line count check 2" ); #is( $source_2->{inst_sites}, 6, "instrumented site count 2" ); # Request and check execution data my $data = $viewer->get_execution_data($tus); -my @lines = @{ $data->[2] }; +my @lines = @{ $data->[0] }; is ( $lines[$_], 0, "src 0 line $_ check" ) for (1..11); is ( $lines[12], 1, "src 0 line 14 check" ); is ( $lines[$_], 0, "src 0 line $_ check" ) for (13..14); @@ -94,7 +94,7 @@ is ( $lines[$_], 0, "src 1 line $_ check" ) for (0..3); cmp_ok ( $lines[$_], ">", 10, "src 1 line $_ check" ) for (4..7); is ( $lines[8], 0, "src 1 line 8 check" ); -my @lines = @{ $data->[0] }; +my @lines = @{ $data->[2] }; is ( $lines[$_], 0, "src 2 line $_ check" ) for (0..8); $project->kill(); diff --git a/t/switch.t b/t/switch.t @@ -30,7 +30,7 @@ $project->compile(); my $inst_src_good = <<EOF; int main(void) -{libscv_init(); +{ int i; switch ((++_citrun_lines[6], i)) { diff --git a/t/while.t b/t/while.t @@ -28,7 +28,7 @@ $project->compile(); my $inst_src_good = <<EOF; int main(void) -{libscv_init(); +{ int i; i = 0;