citrun

watch C/C++ source code execute
Log | Files | Refs | LICENSE

commit 110729158859309c1081337066b58c6933b4245e
parent 6cbf5621f7aefc1ff76829f0ca66181a05ddfa06
Author: Kyle Milz <milz@imac.0x30.net>
Date:   Sat, 27 Feb 2021 20:14:26 -0800

inst: move citrun_inst to inst/ + add Makefile

Diffstat:
Dbin/inst_action.cc | 147-------------------------------------------------------------------------------
Dbin/inst_action.h | 53-----------------------------------------------------
Dbin/inst_consumer.h | 27---------------------------
Dbin/inst_fe.cc | 295-------------------------------------------------------------------------------
Dbin/inst_fe.h | 68--------------------------------------------------------------------
Dbin/inst_feunix.cc | 167-------------------------------------------------------------------------------
Dbin/inst_feunix.h | 18------------------
Dbin/inst_main.cc | 70----------------------------------------------------------------------
Dbin/inst_visitor.cc | 208-------------------------------------------------------------------------------
Ainst/Makefile | 54++++++++++++++++++++++++++++++++++++++++++++++++++++++
Ainst/action.cc | 147+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Ainst/action.h | 54++++++++++++++++++++++++++++++++++++++++++++++++++++++
Ainst/consumer.h | 27+++++++++++++++++++++++++++
Ainst/fe.cc | 295+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Ainst/fe.h | 68++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Ainst/fe_unix.cc | 167+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Ainst/fe_unix.h | 18++++++++++++++++++
Rbin/inst_fewin32.cc -> inst/fewin32.cc | 0
Rbin/inst_fewin32.h -> inst/fewin32.h | 0
Rbin/inst_log.h -> inst/log.h | 0
Ainst/main.cc | 70++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Ainst/visitor.cc | 208+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Rbin/inst_visitor.h -> inst/visitor.h | 0
23 files changed, 1108 insertions(+), 1053 deletions(-)

diff --git a/bin/inst_action.cc b/bin/inst_action.cc @@ -1,147 +0,0 @@ -// -// Copyright (c) 2016 Kyle Milz <kyle@0x30.net> -// -// Permission to use, copy, modify, and distribute this software for any -// purpose with or without fee is hereby granted, provided that the above -// copyright notice and this permission notice appear in all copies. -// -// THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES -// WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF -// MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR -// ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES -// WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN -// ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF -// OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. -// -#include "inst_action.h" -#include "citrun_h.h" // citrun_h - -#include <clang/Frontend/CompilerInstance.h> -#include <fstream> -#include <sstream> -#include <string> - - -std::unique_ptr<clang::ASTConsumer> -InstrumentAction::CreateASTConsumer(clang::CompilerInstance &CI, clang::StringRef file) -{ - // llvm::errs() << "** Creating AST consumer for: " << file << "\n"; - clang::SourceManager &sm = CI.getSourceManager(); - m_TheRewriter.setSourceMgr(sm, CI.getLangOpts()); - - // Hang onto a reference to this so we can read from it later - m_InstrumentASTConsumer = new RewriteASTConsumer(m_TheRewriter); - return std::unique_ptr<clang::ASTConsumer>(m_InstrumentASTConsumer); -} - -void -InstrumentAction::write_modified_src(clang::FileID const &fid) -{ - std::string out_file(getCurrentFile()); - - std::error_code ec; - llvm::raw_fd_ostream output(out_file, ec, llvm::sys::fs::F_None); - if (ec.value()) { - m_log << "Error writing modified source '" << out_file - << "': " << ec.message() << std::endl; - return; - } - - // Write the instrumented source file - m_TheRewriter.getEditBuffer(fid).write(output); - m_log << "Modified source written successfully." << std::endl; -} - -void -InstrumentAction::EndSourceFileAction() -{ - clang::SourceManager &sm = m_TheRewriter.getSourceMgr(); - const clang::FileID main_fid = sm.getMainFileID(); - - clang::SourceLocation end = sm.getLocForEndOfFile(main_fid); - unsigned int num_lines = sm.getPresumedLineNumber(end); - - // - // Write instrumentation preamble. Includes: - // - runtime header - // - per tu citrun_node - // - static constructor for runtime initialization - // - std::ostringstream preamble; - preamble << -R"(#ifdef __cplusplus -extern "C" { -#endif -)"; - preamble << citrun_h; - preamble << "static struct citrun_node _citrun = {\n" - << " " << num_lines << ",\n" - << " \"" << m_compiler_file_name << "\",\n" - << " \"" << getCurrentFile().str() << "\",\n"; - preamble << "};\n"; - -#ifdef _WIN32 - // - // Cribbed from an answer by Joe: - // http://stackoverflow.com/questions/1113409/attribute-constructor-equivalent-in-vc - // - preamble << R"( -#pragma section(".CRT$XCU",read) -#define INITIALIZER2_(f,p) \ - static void f(void); \ - __declspec(allocate(".CRT$XCU")) void (*f##_)(void) = f; \ - __pragma(comment(linker,"/include:" p #f "_")) \ - static void f(void) -#define INITIALIZER(f) INITIALIZER2_(f,"_") -)"; - preamble << "INITIALIZER( init_" - << m_compiler_file_name.substr(0, m_compiler_file_name.find(".")) - << ")" - << R"( -{ - citrun_node_add(citrun_major, citrun_minor, &_citrun); -} -)"; -#else - preamble << R"( -__attribute__((constructor)) static void -citrun_constructor() -{ - citrun_node_add(citrun_major, citrun_minor, &_citrun); -} -)"; -#endif - - preamble << R"( -#ifdef __cplusplus -} -#endif -#line 1 -)"; - - clang::SourceLocation start = sm.getLocForStartOfFile(main_fid); - if (m_is_citruninst) { - std::ofstream preamble_file(getCurrentFile().str() + ".preamble"); - preamble_file << preamble.str(); - preamble_file.close(); - } else if (m_TheRewriter.InsertTextAfter(start, preamble.str())) { - m_log << "Failed to insert the instrumentation preabmle."; - return; - } - - m_log << "Instrumentation of '" << m_compiler_file_name << "' finished:" << std::endl; - m_log << " " << num_lines << " Lines of source code" << std::endl; - - // - // Write out statistics from the AST visitor. - // - RewriteASTVisitor v = m_InstrumentASTConsumer->get_visitor(); - for (int i = 0; i < NCOUNTERS; ++i) { - if (v.m_counters[i] == 0) - continue; - m_log << " " << v.m_counters[i] << " " - << v.m_counter_descr[i] << std::endl; - } - - write_modified_src(main_fid); -} diff --git a/bin/inst_action.h b/bin/inst_action.h @@ -1,53 +0,0 @@ -#include "inst_consumer.h" -#include "inst_log.h" - -#include <clang/Frontend/FrontendActions.h> -#include <clang/Rewrite/Core/Rewriter.h> -#include <clang/Tooling/Tooling.h> - - -// For each source file provided to the tool, a new FrontendAction is created. -class InstrumentAction : public clang::ASTFrontendAction -{ - void write_modified_src(clang::FileID const &); - - clang::Rewriter m_TheRewriter; - RewriteASTConsumer *m_InstrumentASTConsumer; - InstrumentLogger& m_log; - bool m_is_citruninst; - std::string m_compiler_file_name; - -public: - InstrumentAction(InstrumentLogger &log, bool citruninst, - std::string const &filename) : - m_log(log), - m_is_citruninst(citruninst), - m_compiler_file_name(filename) - {}; - - void EndSourceFileAction() override; - std::unique_ptr<clang::ASTConsumer> CreateASTConsumer(clang::CompilerInstance &, clang::StringRef) override; -}; - -// -// Needed because we pass custom stuff down into the ASTFrontendAction -// -class InstrumentActionFactory : public clang::tooling::FrontendActionFactory -{ - InstrumentLogger& m_log; - bool m_is_citruninst; - std::vector<std::string> m_source_files; - int m_i; - -public: - InstrumentActionFactory(InstrumentLogger &log, bool citruninst, std::vector<std::string> const &src_files) : - m_log(log), - m_is_citruninst(citruninst), - m_source_files(src_files), - m_i(0) - {}; - - clang::ASTFrontendAction *create() { - return new InstrumentAction(m_log, m_is_citruninst, m_source_files[m_i++]); - } -}; diff --git a/bin/inst_consumer.h b/bin/inst_consumer.h @@ -1,27 +0,0 @@ -#include "inst_visitor.h" - -#include <clang/AST/ASTConsumer.h> -#include <clang/Rewrite/Core/Rewriter.h> - - -class RewriteASTConsumer : public clang::ASTConsumer -{ - RewriteASTVisitor Visitor; - -public: - explicit RewriteASTConsumer(clang::Rewriter &R) : - Visitor(R) {} - - // Override the method that gets called for each parsed top-level - // declaration. - virtual bool HandleTopLevelDecl(clang::DeclGroupRef DR) { - for (auto &b : DR) { - // Traverse the declaration using our AST visitor. - Visitor.TraverseDecl(b); - // b->dump(); - } - return true; - } - - RewriteASTVisitor &get_visitor() { return Visitor; }; -}; diff --git a/bin/inst_fe.cc b/bin/inst_fe.cc @@ -1,295 +0,0 @@ -// -// Copyright (c) 2016 Kyle Milz <kyle@0x30.net> -// -// Permission to use, copy, modify, and distribute this software for any -// purpose with or without fee is hereby granted, provided that the above -// copyright notice and this permission notice appear in all copies. -// -// THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES -// WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF -// MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR -// ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES -// WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN -// ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF -// OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. -// -#include "inst_action.h" // InstrumentActionFactory -#include "inst_fe.h" -#include "citrun.h" // citrun_major, citrun_minor - -#include <clang/Basic/Diagnostic.h> // IgnoringDiagConsumer -#include <clang/Tooling/CommonOptionsParser.h> -#include <clang/Tooling/Tooling.h> -#include <llvm/Support/raw_os_ostream.h> - -#include <algorithm> // std::find_if -#include <cstdio> // tmpnam -#include <cstring> // strcmp -#include <iostream> // std::cerr -#include <sstream> // std::ostringstream - - -static llvm::cl::OptionCategory ToolingCategory("citrun_inst options"); - -InstFrontend::InstFrontend(int argc, char *argv[], bool is_citrun_inst) : - m_start_time(std::chrono::high_resolution_clock::now()), - m_args(argv, argv + argc), - m_is_citruninst(is_citrun_inst), - m_log(is_citrun_inst) -{ -} - -InstFrontend::~InstFrontend() -{ -} - -void -InstFrontend::log_identity() -{ - m_log << ">> citrun_inst v" << citrun_major << "." << citrun_minor; - log_os_str(); - m_log << " called as " << m_args[0] << std::endl; -} - -void -InstFrontend::get_paths() -{ - m_compilers_path = PREFIX ; - m_compilers_path += dir_sep() ; - m_compilers_path += "share" ; - m_compilers_path += dir_sep() ; - m_compilers_path += "citrun" ; - - m_lib_path = PREFIX ; - m_lib_path += dir_sep(); - m_lib_path += lib_name(); - - m_log << "Compilers path = '" << m_compilers_path << "'" << std::endl; -} - -// -// Tries to remove m_compilers_path from PATH otherwise it exits easily. -// -void -InstFrontend::clean_PATH() -{ - if (m_is_citruninst == true) - return; - - char *path; - if ((path = std::getenv("PATH")) == NULL) { - std::cerr << "Error: PATH is not set." << std::endl; - m_log << "Error: PATH is not set." << std::endl; - exit(1); - } - - m_log << "PATH = '" << path << "'" << std::endl; - - // Filter m_compilers_path out of PATH - std::stringstream path_ss(path); - std::string component; - bool first_component = true; - bool found_citrun_path = false; - std::ostringstream new_path; - - while (std::getline(path_ss, component, path_sep())) { - if (component == m_compilers_path) { - found_citrun_path = true; - continue; - } - - if (first_component == false) - new_path << path_sep(); - - // It wasn't m_compilers_path, keep it - new_path << component; - first_component = false; - } - - if (!found_citrun_path) { - // - // This is a really bad situation to be in. We are currently - // executing and can't tell which PATH element we were called - // from. If we exec there's a chance we'll get stuck in an - // infinite exec loop. - // - // Error visibly so this can be fixed as soon as possible. - // - std::stringstream err; - err << "Error: '" << m_compilers_path << "' not in PATH."; - - std::cerr << err.str() << std::endl; - m_log << err.str() << std::endl; - exit(1); - } - - set_path(new_path.str()); -} - -// -// Guess if the argument is a source file. If it is stash a backup of the file -// and sync the timestamps. -// -void -InstFrontend::save_if_srcfile(char *arg) -{ - std::array<std::string, 4> exts = {{ ".c", ".cc", ".cxx", ".cpp" }}; - if (std::find_if(exts.begin(), exts.end(), ends_with(arg)) == exts.end()) - return; - - char *dst_fn; - if ((dst_fn = std::tmpnam(NULL)) == NULL) { - m_log << "tmpnam failed." << std::endl; - return; - } - - m_source_files.push_back(arg); - m_log << "Found source file '" << arg << "'" << std::endl; - - if (m_is_citruninst) - // In this mode the modified source file is written to a - // completely different file. - return; - - copy_file(dst_fn, arg); - m_temp_file_map[arg] = dst_fn; -} - -// -// Walks the entire command line taking action on important arguments. -// -void -InstFrontend::process_cmdline() -{ - bool object_arg = false; - bool compile_arg = false; - - // - // Walk every argument one by one looking for preprocessor switches, - // compile mode flags and source files. - // - for (auto &arg : m_args) { - if (std::strcmp(arg, "-E") == 0 || std::strcmp(arg, "-MM") == 0) { - // I don't know the repercussions of doing otherwise. - m_log << "Preprocessor argument " << arg << " found" - << std::endl; - exec_compiler(); - } - else if (std::strcmp(arg, "-o") == 0) - object_arg = true; - else if (std::strcmp(arg, "-c") == 0) - compile_arg = true; -#ifdef _WIN32 - else if (std::strcmp(arg, "/c") == 0) - compile_arg = true; -#endif // _WIN32 - - save_if_srcfile(arg); - } - - if (is_link(object_arg, compile_arg)) { - m_log << "Link detected, adding '"<< m_lib_path - << "' to command line." << std::endl; - m_args.push_back(const_cast<char *>(m_lib_path.c_str())); - } - - m_log << "Command line is '" << m_args[0]; - for (unsigned int i = 1; i < m_args.size(); ++i) - m_log << " " << m_args[i]; - m_log << "'" << std::endl; - - if (m_source_files.size() != 0) - return; - - m_log << "No source files found on command line." << std::endl; - exec_compiler(); -} - -// -// Creates and executes InstrumentAction objects for detected source files. -// -void -InstFrontend::instrument() -{ - // - // Create a special command line for ClangTool that looks like: - // clang++ src1.c src2.c -- clang++ -I. -Isrc -c src1.c src2.c - // - std::vector<const char *> clang_argv; - - clang_argv.push_back(m_args[0]); - for (auto &s : m_source_files) - clang_argv.push_back(s.c_str()); - clang_argv.push_back("--"); - clang_argv.insert(clang_argv.end(), m_args.begin(), m_args.end()); -#if defined(__OpenBSD__) - clang_argv.push_back("-I/usr/local/lib/clang/3.8.0/include"); - m_log << "Added clangtool argument '" << clang_argv.back() << "'" << std::endl; -#elif defined(__APPLE__) - clang_argv.push_back("-I/opt/local/libexec/llvm-3.8/lib/clang/3.8.1/include"); - m_log << "Added clangtool argument '" << clang_argv.back() << "'" << std::endl; -#elif defined(WIN32) - clang_argv.push_back(R"(-IC:\Clang\lib\clang\3.9.1\include)"); - m_log << "Added clangtool argument '" << clang_argv.back() << "'" << std::endl; -#endif - - int clang_argc = clang_argv.size(); - clang::tooling::CommonOptionsParser - op(clang_argc, &clang_argv[0], ToolingCategory); - clang::tooling::ClangTool - Tool(op.getCompilations(), op.getSourcePathList()); - - // - // Ignore all errors/warnings by default. - // This makes Tool.run() always return 0 too. - // - Tool.setDiagnosticConsumer(new clang::IgnoringDiagConsumer()); - - std::unique_ptr<InstrumentActionFactory> f = - llvm::make_unique<InstrumentActionFactory>(m_log, m_is_citruninst, m_source_files); - - // - // Run instrumentation. All source files are processed here. - // - Tool.run(f.get()); - - // All of the time until now is the overhead citrun_inst adds. - std::chrono::high_resolution_clock::time_point now = - std::chrono::high_resolution_clock::now(); - m_log << std::chrono::duration_cast<std::chrono::milliseconds>(now - m_start_time).count() - << " Milliseconds spent rewriting source." << std::endl; - - // This is as far as we go in citrun_inst mode. - if (m_is_citruninst) - exit(0); -} - -// -// Restore source files from stashed backups and sync timestamps. -// -void -InstFrontend::restore_original_src() -{ - for (auto &tmp_file : m_temp_file_map) { - m_log << "Restored '" << tmp_file.first << "'" << std::endl; - - copy_file(tmp_file.first, tmp_file.second); - unlink(tmp_file.second.c_str()); - } -} - -void -InstFrontend::compile_instrumented() -{ - int ret; - - ret = fork_compiler(); - m_log << "Rewritten source compile " << (ret ? "failed" : "successful") - << std::endl; - - restore_original_src(); - - if (ret) - // Rewritten compile failed. Run again without modified src. - exec_compiler(); -} diff --git a/bin/inst_fe.h b/bin/inst_fe.h @@ -1,68 +0,0 @@ -// -// Instrument Frontend. -// Takes command lines and instruments source code. -// -#include "inst_log.h" - -#include <chrono> // std::chrono::high_resolution_clock -#include <map> // std::map -#include <string> // std::string - -class InstFrontend -{ - void save_if_srcfile(char *); - void restore_original_src(); - - std::string m_compilers_path; - std::string m_lib_path; - std::chrono::high_resolution_clock::time_point m_start_time; - std::map<std::string, std::string> m_temp_file_map; - - // Implemented by operating system specific classes. - virtual void log_os_str() = 0; - virtual char dir_sep() = 0; - virtual char path_sep() = 0; - virtual std::string lib_name() = 0; - virtual void set_path(std::string const &) = 0; - virtual bool is_link(bool, bool) = 0; - virtual void copy_file(std::string const &, std::string const &) = 0; - virtual void exec_compiler() = 0; - virtual int fork_compiler() = 0; - -protected: - std::vector<char *> m_args; - bool m_is_citruninst; - std::vector<std::string> m_source_files; - InstrumentLogger m_log; - -public: - InstFrontend(int, char *argv[], bool); - virtual ~InstFrontend() = 0; - - void log_identity(); - void get_paths(); - void clean_PATH(); - void process_cmdline(); - void instrument(); - void compile_instrumented(); -}; - -// -// Helper class that is a unary predicate suitable for use with std::find_if. -// -class ends_with -{ - std::string arg; -public: - ends_with(char *argument) : - arg(argument) - {} - - bool operator ()(std::string const &suffix) const - { - if (suffix.length() > arg.length()) - return false; - - return std::equal(suffix.rbegin(), suffix.rend(), arg.rbegin()); - } -}; diff --git a/bin/inst_feunix.cc b/bin/inst_feunix.cc @@ -1,167 +0,0 @@ -// -// Copyright (c) 2016 Kyle Milz <kyle@0x30.net> -// -// Permission to use, copy, modify, and distribute this software for any -// purpose with or without fee is hereby granted, provided that the above -// copyright notice and this permission notice appear in all copies. -// -// THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES -// WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF -// MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR -// ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES -// WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN -// ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF -// OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. -// -#include "inst_feunix.h" - -#include <sys/stat.h> // stat -#include <sys/time.h> // utimes -#include <sys/utsname.h> // uname -#include <sys/wait.h> // waitpid - -#include <err.h> -#include <fstream> // ifstream, ofstream -#include <unistd.h> // execvp, fork, getpid, unlink - - -char -InstFrontendUnix::dir_sep() -{ - return '/'; -} - -char -InstFrontendUnix::path_sep() -{ - return ':'; -} - -std::string -InstFrontendUnix::lib_name() -{ - return "lib/libcitrun.a"; -} - -void -InstFrontendUnix::log_os_str() -{ - struct utsname utsname; - - if (uname(&utsname) == -1) - m_log << " (Unknown OS)"; - else - m_log << " (" << utsname.sysname << "-" << utsname.release - << " " << utsname.machine << ")"; - - // Sometimes we're not called as citrun_inst so force that here. - setprogname("citrun_inst"); -} - -void -InstFrontendUnix::set_path(std::string const &new_path) -{ - if (setenv("PATH", new_path.c_str(), 1)) - err(1, "setenv"); -} - -// -// Copies one file to another preserving timestamps. -// -void -InstFrontendUnix::copy_file(std::string const &dst_fn, std::string const &src_fn) -{ - struct stat sb; - struct timeval st_tim[2]; - - // Save original access and modification times - if (stat(src_fn.c_str(), &sb) < 0) - err(1, "stat"); -#ifdef __APPLE__ - TIMESPEC_TO_TIMEVAL(&st_tim[0], &sb.st_atimespec); - TIMESPEC_TO_TIMEVAL(&st_tim[1], &sb.st_mtimespec); -#else - TIMESPEC_TO_TIMEVAL(&st_tim[0], &sb.st_atim); - TIMESPEC_TO_TIMEVAL(&st_tim[1], &sb.st_mtim); -#endif - - std::ifstream src(src_fn, std::ios::binary); - std::ofstream dst(dst_fn, std::ios::binary); - - dst << src.rdbuf(); - - src.close(); - dst.close(); - - // - // Restore the original access and modification time, it's not critical - // if it fails. - // - if (utimes(dst_fn.c_str(), st_tim) < 0) - warn("utimes"); -} - -bool -InstFrontendUnix::is_link(bool object_arg, bool compile_arg) -{ - if (!object_arg && !compile_arg && m_source_files.size() > 0) - // Assume single line a.out compilation - // $ gcc main.c - return true; - else if (object_arg && !compile_arg) - // gcc -o main main.o fib.o while.o - // gcc -o main main.c fib.c - return true; - - return false; -} - -// -// Execute the compiler by calling execvp(3) on the m_args vector. -// -void -InstFrontendUnix::exec_compiler() -{ - if (m_is_citruninst) { - m_log << "Running as citrun_inst, not calling exec()" << std::endl; - exit(0); - } - - // Null termination explicitly mentioned in execvp(3). - m_args.push_back(NULL); - if (execvp(m_args[0], &m_args[0])) - err(1, "execvp"); -} - -// -// fork(2) then execute the compiler and wait for it to finish. Returns exit -// code of native compiler. -// -int -InstFrontendUnix::fork_compiler() -{ - pid_t child_pid; - int status; - int exit = -1; - - if ((child_pid = fork()) < 0) - err(1, "fork"); - - // If in child execute compiler. - if (child_pid == 0) - exec_compiler(); - - m_log << "Forked compiler '" << m_args[0] << "' " - << "pid is '" << child_pid << "'" << std::endl; - - // Wait for the child to finish so we can get its exit code. - if (waitpid(child_pid, &status, 0) < 0) - err(1, "waitpid"); - - // Decode the exit code from status. - if (WIFEXITED(status)) - exit = WEXITSTATUS(status); - - // Return the exit code of the native compiler. - return exit; -} diff --git a/bin/inst_feunix.h b/bin/inst_feunix.h @@ -1,18 +0,0 @@ -#include "inst_fe.h" - -class InstFrontendUnix : public InstFrontend -{ - // Use InstFrontend's constructor - using InstFrontend::InstFrontend; - - // Mandatory interface implementation. - char dir_sep(); - char path_sep(); - std::string lib_name(); - void log_os_str(); - void set_path(std::string const &); - bool is_link(bool, bool); - void copy_file(std::string const &, std::string const &); - void exec_compiler(); - int fork_compiler(); -}; diff --git a/bin/inst_main.cc b/bin/inst_main.cc @@ -1,70 +0,0 @@ -// -// Copyright (c) 2016 Kyle Milz <kyle@0x30.net> -// -// Permission to use, copy, modify, and distribute this software for any -// purpose with or without fee is hereby granted, provided that the above -// copyright notice and this permission notice appear in all copies. -// -// THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES -// WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF -// MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR -// ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES -// WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN -// ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF -// OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. -// -#ifdef _WIN32 -#include "inst_fewin32.h" - -#include <windows.h> -#include <Shlwapi.h> // PathFindFileNameA -#else /* _WIN32 */ -#include <err.h> -#include <libgen.h> // basename - -#include "inst_feunix.h" // InstFrontend -#endif /* _WIN32 */ - -#include <cstring> // strcmp - - -int -main(int argc, char *argv[]) -{ - char *base_name; - bool is_citrun_inst = false; - -#ifdef _WIN32 - // XXX: error checking - base_name = PathFindFileNameA(argv[0]); -#else // _WIN32 - // Protect against argv[0] being an absolute path. - if ((base_name = basename(argv[0])) == NULL) - err(1, "basename"); -#endif // _WIN32 - - // Switch tool mode if we're called as 'citrun_inst'. - if ((std::strcmp(base_name, "citrun_inst") == 0) || - (std::strcmp(base_name, "citrun_inst.exe") == 0)) - is_citrun_inst = true; - - // Always re-search PATH for binary name (in non citrun_inst case). - if (std::strcmp(argv[0], base_name) != 0) - argv[0] = base_name; - -#ifdef _WIN32 - InstFrontendWin32 main(argc, argv, is_citrun_inst); -#else - InstFrontendUnix main(argc, argv, is_citrun_inst); -#endif - - main.log_identity(); - main.get_paths(); - main.clean_PATH(); - main.process_cmdline(); - - main.instrument(); - main.compile_instrumented(); - - return 0; -} diff --git a/bin/inst_visitor.cc b/bin/inst_visitor.cc @@ -1,208 +0,0 @@ -// -// Copyright (c) 2016 Kyle Milz <kyle@0x30.net> -// -// Permission to use, copy, modify, and distribute this software for any -// purpose with or without fee is hereby granted, provided that the above -// copyright notice and this permission notice appear in all copies. -// -// THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES -// WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF -// MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR -// ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES -// WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN -// ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF -// OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. -// -#include "inst_visitor.h" - -#include <clang/AST/AST.h> -#include <clang/Lex/Lexer.h> -#include <sstream> -#include <string> - - -RewriteASTVisitor::RewriteASTVisitor(clang::Rewriter &R) : - m_TheRewriter(R), - m_SM(R.getSourceMgr()), - m_lopt(R.getLangOpts()), - m_counters(), - m_counter_descr( {{ - "Function definitions", - "If statements", - "For loops", - "While loops", - "Do while loops", - "Switch statements", - "Return statement values", - "Call expressions", - "Total statements", - "Binary operators", - "Errors rewriting source code" - }} ) -{ -} - -bool -RewriteASTVisitor::TraverseStmt(clang::Stmt *s) -{ - if (s == NULL) - return true; - - clang::SourceLocation start_loc = s->getBeginLoc(); - if (m_SM.isInMainFile(start_loc) == false) - return false; - - // Instrumenting statement conditions in macros works perfectly. - // Instrumenting binary operators in macros does not work well. - if (clang::Lexer::isAtStartOfMacroExpansion(start_loc, m_SM, m_lopt)) - return false; - - RecursiveASTVisitor<RewriteASTVisitor>::TraverseStmt(s); - return true; -} - -bool -RewriteASTVisitor::TraverseDecl(clang::Decl *d) -{ - if (m_SM.isInMainFile(d->getBeginLoc()) == false) - return false; - - if (clang::isa<clang::VarDecl>(d)) { - clang::VarDecl *vd = clang::cast<clang::VarDecl>(d); - if (vd->hasGlobalStorage()) - return false; - } - if (clang::isa<clang::RecordDecl>(d)) - return false; - if (clang::isa<clang::EnumDecl>(d)) - return false; - - RecursiveASTVisitor<RewriteASTVisitor>::TraverseDecl(d); - return true; -} - -bool -RewriteASTVisitor::VisitVarDecl(clang::VarDecl *d) -{ - return true; -} - -bool -RewriteASTVisitor::VisitStmt(clang::Stmt *s) -{ - ++m_counters[TOTAL_STMT]; - return true; -} - -bool -RewriteASTVisitor::VisitIfStmt(clang::IfStmt *i) -{ - modify_stmt(i->getCond(), m_counters[IF_STMT]); - return true; -} - -bool -RewriteASTVisitor::VisitForStmt(clang::ForStmt *f) -{ - modify_stmt(f->getCond(), m_counters[FOR_STMT]); - return true; -} - -bool -RewriteASTVisitor::VisitWhileStmt(clang::WhileStmt *w) -{ - modify_stmt(w->getCond(), m_counters[WHILE_STMT]); - return true; -} - -bool -RewriteASTVisitor::VisitDoStmt(clang::DoStmt *d) -{ - modify_stmt(d->getCond(), m_counters[DOWHILE_STMT]); - return true; -} - -bool -RewriteASTVisitor::VisitSwitchStmt(clang::SwitchStmt *s) -{ - modify_stmt(s->getCond(), m_counters[SWITCH_STMT]); - return true; -} - -bool -RewriteASTVisitor::VisitReturnStmt(clang::ReturnStmt *r) -{ - modify_stmt(r->getRetValue(), m_counters[RET_STMT_VAL]); - return true; -} - -bool -RewriteASTVisitor::VisitCallExpr(clang::CallExpr *c) -{ - modify_stmt(c, m_counters[CALL_EXPR]); - return true; -} - -bool -RewriteASTVisitor::VisitBinaryOperator(clang::BinaryOperator *b) -{ - // If we can't rewrite the last token, don't even start. - if (b->getEndLoc().isMacroID()) - return true; - modify_stmt(b, m_counters[BIN_OPER]); - return true; -} - -bool -RewriteASTVisitor::modify_stmt(clang::Stmt *s, int &counter) -{ - if (s == NULL) - return false; - - std::stringstream ss; - ss << "(++_citrun.data[" - << m_SM.getPresumedLineNumber(s->getBeginLoc()) - 1 - << "], "; - - if (m_TheRewriter.InsertTextBefore(s->getBeginLoc(), ss.str())) { - ++m_counters[REWRITE_ERROR]; - return false; - } - - m_TheRewriter.InsertTextAfter(real_loc_end(s), ")"); - ++counter; - - return true; -} - -bool -RewriteASTVisitor::VisitFunctionDecl(clang::FunctionDecl *f) -{ - // Only function definitions (with bodies), not declarations. - if (f->hasBody() == 0) - return true; - - std::stringstream rewrite_text; - - clang::Stmt *FuncBody = f->getBody(); - clang::SourceLocation curly_brace(FuncBody->getBeginLoc().getLocWithOffset(1)); - - // Animate function calls by firing the entire declaration. - int decl_start = m_SM.getPresumedLineNumber(f->getBeginLoc()); - int decl_end = m_SM.getPresumedLineNumber(curly_brace); - for (int i = decl_start; i <= decl_end; ++i) - rewrite_text << "++_citrun.data[" << i - 1 << "];"; - - // Rewrite the function source right after the beginning curly brace. - m_TheRewriter.InsertTextBefore(curly_brace, rewrite_text.str()); - - ++m_counters[FUNC_DEF]; - return true; -} - -clang::SourceLocation -RewriteASTVisitor::real_loc_end(clang::Stmt *d) -{ - clang::SourceLocation _e(d->getEndLoc()); - return clang::Lexer::getLocForEndOfToken(_e, 0, m_SM, m_lopt); -} diff --git a/inst/Makefile b/inst/Makefile @@ -0,0 +1,54 @@ +#CXX = /usr/local/bin/clang++ +CFLAGS += -Wall -W -Wcast-qual -Wwrite-strings +CXXFLAGS += -Wall -W -Wcast-qual \ + -std=c++14 \ + -fno-exceptions \ + -fno-rtti \ + -Wno-unused-parameter \ + -Werror=date-time \ + -fvisibility-inlines-hidden \ + -Wdelete-non-virtual-dtor +CXXFLAGS += -I../lib + +LLVM_CONFIG ?= /usr/local/bin/llvm-config + +#PREFIX != pwd +PREFIX?= ${.CURDIR}/obj + +PROG= citrun_inst + +#all: ${PROG} + +# +# citrun_inst +# +CXXFLAGS_LLVM !!= $(LLVM_CONFIG) --cppflags +LDFLAGS !!= $(LLVM_CONFIG) --ldflags +LLVM_LDLIBS !!= $(LLVM_CONFIG) --libs + +CXXFLAGS += $(CXXFLAGS_LLVM) -DPREFIX=\"$(PREFIX)\" +LDLIBS = -lclangTooling \ + -lclangFrontendTool \ + -lclangFrontend \ + -lclangDriver \ + -lclangSerialization \ + -lclangCodeGen \ + -lclangParse \ + -lclangSema \ + -lclangAnalysis \ + -lclangRewrite \ + -lclangRewriteFrontend \ + -lclangEdit \ + -lclangAST \ + -lclangLex \ + -lclangBasic \ + $(LLVM_LDLIBS) + +SRCS = main.cc \ + fe.cc \ + fe_unix.cc \ + action.cc \ + visitor.cc + +citrun_inst: ${SRCS:.cc=.o} + c++ $(LDFLAGS) -o $@ $(SRCS:cc=o) $(LDLIBS) diff --git a/inst/action.cc b/inst/action.cc @@ -0,0 +1,147 @@ +// +// Copyright (c) 2016 Kyle Milz <kyle@0x30.net> +// +// Permission to use, copy, modify, and distribute this software for any +// purpose with or without fee is hereby granted, provided that the above +// copyright notice and this permission notice appear in all copies. +// +// THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES +// WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF +// MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR +// ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES +// WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN +// ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF +// OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. +// +#include "action.h" +#include "citrun_h.h" // citrun_h + +#include <clang/Frontend/CompilerInstance.h> +#include <fstream> +#include <sstream> +#include <string> + + +std::unique_ptr<clang::ASTConsumer> +InstrumentAction::CreateASTConsumer(clang::CompilerInstance &CI, clang::StringRef file) +{ + // llvm::errs() << "** Creating AST consumer for: " << file << "\n"; + clang::SourceManager &sm = CI.getSourceManager(); + m_TheRewriter.setSourceMgr(sm, CI.getLangOpts()); + + // Hang onto a reference to this so we can read from it later + m_InstrumentASTConsumer = new RewriteASTConsumer(m_TheRewriter); + return std::unique_ptr<clang::ASTConsumer>(m_InstrumentASTConsumer); +} + +void +InstrumentAction::write_modified_src(clang::FileID const &fid) +{ + std::string out_file(getCurrentFile()); + + std::error_code ec; + llvm::raw_fd_ostream output(out_file, ec, llvm::sys::fs::F_None); + if (ec.value()) { + m_log << "Error writing modified source '" << out_file + << "': " << ec.message() << std::endl; + return; + } + + // Write the instrumented source file + m_TheRewriter.getEditBuffer(fid).write(output); + m_log << "Modified source written successfully." << std::endl; +} + +void +InstrumentAction::EndSourceFileAction() +{ + clang::SourceManager &sm = m_TheRewriter.getSourceMgr(); + const clang::FileID main_fid = sm.getMainFileID(); + + clang::SourceLocation end = sm.getLocForEndOfFile(main_fid); + unsigned int num_lines = sm.getPresumedLineNumber(end); + + // + // Write instrumentation preamble. Includes: + // - runtime header + // - per tu citrun_node + // - static constructor for runtime initialization + // + std::ostringstream preamble; + preamble << +R"(#ifdef __cplusplus +extern "C" { +#endif +)"; + preamble << citrun_h; + preamble << "static struct citrun_node _citrun = {\n" + << " " << num_lines << ",\n" + << " \"" << m_compiler_file_name << "\",\n" + << " \"" << getCurrentFile().str() << "\",\n"; + preamble << "};\n"; + +#ifdef _WIN32 + // + // Cribbed from an answer by Joe: + // http://stackoverflow.com/questions/1113409/attribute-constructor-equivalent-in-vc + // + preamble << R"( +#pragma section(".CRT$XCU",read) +#define INITIALIZER2_(f,p) \ + static void f(void); \ + __declspec(allocate(".CRT$XCU")) void (*f##_)(void) = f; \ + __pragma(comment(linker,"/include:" p #f "_")) \ + static void f(void) +#define INITIALIZER(f) INITIALIZER2_(f,"_") +)"; + preamble << "INITIALIZER( init_" + << m_compiler_file_name.substr(0, m_compiler_file_name.find(".")) + << ")" + << R"( +{ + citrun_node_add(citrun_major, citrun_minor, &_citrun); +} +)"; +#else + preamble << R"( +__attribute__((constructor)) static void +citrun_constructor() +{ + citrun_node_add(citrun_major, citrun_minor, &_citrun); +} +)"; +#endif + + preamble << R"( +#ifdef __cplusplus +} +#endif +#line 1 +)"; + + clang::SourceLocation start = sm.getLocForStartOfFile(main_fid); + if (m_is_citruninst) { + std::ofstream preamble_file(getCurrentFile().str() + ".preamble"); + preamble_file << preamble.str(); + preamble_file.close(); + } else if (m_TheRewriter.InsertTextAfter(start, preamble.str())) { + m_log << "Failed to insert the instrumentation preabmle."; + return; + } + + m_log << "Instrumentation of '" << m_compiler_file_name << "' finished:" << std::endl; + m_log << " " << num_lines << " Lines of source code" << std::endl; + + // + // Write out statistics from the AST visitor. + // + RewriteASTVisitor v = m_InstrumentASTConsumer->get_visitor(); + for (int i = 0; i < NCOUNTERS; ++i) { + if (v.m_counters[i] == 0) + continue; + m_log << " " << v.m_counters[i] << " " + << v.m_counter_descr[i] << std::endl; + } + + write_modified_src(main_fid); +} diff --git a/inst/action.h b/inst/action.h @@ -0,0 +1,54 @@ +#include "consumer.h" +#include "log.h" + +#include <clang/Frontend/FrontendActions.h> +#include <clang/Rewrite/Core/Rewriter.h> +#include <clang/Tooling/Tooling.h> + + +// For each source file provided to the tool, a new FrontendAction is created. +class InstrumentAction : public clang::ASTFrontendAction +{ + void write_modified_src(clang::FileID const &); + + clang::Rewriter m_TheRewriter; + RewriteASTConsumer *m_InstrumentASTConsumer; + InstrumentLogger& m_log; + bool m_is_citruninst; + std::string m_compiler_file_name; + +public: + InstrumentAction(InstrumentLogger &log, bool citruninst, + std::string const &filename) : + m_log(log), + m_is_citruninst(citruninst), + m_compiler_file_name(filename) + {}; + + void EndSourceFileAction() override; + std::unique_ptr<clang::ASTConsumer> CreateASTConsumer(clang::CompilerInstance &, clang::StringRef) override; +}; + +// +// Needed because we pass custom stuff down into the ASTFrontendAction +// +class InstrumentActionFactory : public clang::tooling::FrontendActionFactory +{ + InstrumentLogger& m_log; + bool m_is_citruninst; + std::vector<std::string> m_source_files; + int m_i; + +public: + InstrumentActionFactory(InstrumentLogger &log, bool citruninst, std::vector<std::string> const &src_files) : + m_log(log), + m_is_citruninst(citruninst), + m_source_files(src_files), + m_i(0) + {}; + + // clang::ASTFrontendAction *create() { + std::unique_ptr<clang::FrontendAction> create() { + return std::unique_ptr<clang::FrontendAction>(new InstrumentAction(m_log, m_is_citruninst, m_source_files[m_i++])); + } +}; diff --git a/inst/consumer.h b/inst/consumer.h @@ -0,0 +1,27 @@ +#include "visitor.h" + +#include <clang/AST/ASTConsumer.h> +#include <clang/Rewrite/Core/Rewriter.h> + + +class RewriteASTConsumer : public clang::ASTConsumer +{ + RewriteASTVisitor Visitor; + +public: + explicit RewriteASTConsumer(clang::Rewriter &R) : + Visitor(R) {} + + // Override the method that gets called for each parsed top-level + // declaration. + virtual bool HandleTopLevelDecl(clang::DeclGroupRef DR) { + for (auto &b : DR) { + // Traverse the declaration using our AST visitor. + Visitor.TraverseDecl(b); + // b->dump(); + } + return true; + } + + RewriteASTVisitor &get_visitor() { return Visitor; }; +}; diff --git a/inst/fe.cc b/inst/fe.cc @@ -0,0 +1,295 @@ +// +// Copyright (c) 2016 Kyle Milz <kyle@0x30.net> +// +// Permission to use, copy, modify, and distribute this software for any +// purpose with or without fee is hereby granted, provided that the above +// copyright notice and this permission notice appear in all copies. +// +// THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES +// WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF +// MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR +// ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES +// WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN +// ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF +// OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. +// +#include "action.h" // InstrumentActionFactory +#include "fe.h" +#include "citrun.h" // citrun_major, citrun_minor + +#include <clang/Basic/Diagnostic.h> // IgnoringDiagConsumer +#include <clang/Tooling/CommonOptionsParser.h> +#include <clang/Tooling/Tooling.h> +#include <llvm/Support/raw_os_ostream.h> + +#include <algorithm> // std::find_if +#include <cstdio> // tmpnam +#include <cstring> // strcmp +#include <iostream> // std::cerr +#include <sstream> // std::ostringstream + + +static llvm::cl::OptionCategory ToolingCategory("citrun_inst options"); + +InstFrontend::InstFrontend(int argc, char *argv[], bool is_citrun_inst) : + m_start_time(std::chrono::high_resolution_clock::now()), + m_args(argv, argv + argc), + m_is_citruninst(is_citrun_inst), + m_log(is_citrun_inst) +{ +} + +InstFrontend::~InstFrontend() +{ +} + +void +InstFrontend::log_identity() +{ + m_log << ">> citrun_inst v" << citrun_major << "." << citrun_minor; + log_os_str(); + m_log << " called as " << m_args[0] << std::endl; +} + +void +InstFrontend::get_paths() +{ + m_compilers_path = PREFIX ; + m_compilers_path += dir_sep() ; + m_compilers_path += "share" ; + m_compilers_path += dir_sep() ; + m_compilers_path += "citrun" ; + + m_lib_path = PREFIX ; + m_lib_path += dir_sep(); + m_lib_path += lib_name(); + + m_log << "Compilers path = '" << m_compilers_path << "'" << std::endl; +} + +// +// Tries to remove m_compilers_path from PATH otherwise it exits easily. +// +void +InstFrontend::clean_PATH() +{ + if (m_is_citruninst == true) + return; + + char *path; + if ((path = std::getenv("PATH")) == NULL) { + std::cerr << "Error: PATH is not set." << std::endl; + m_log << "Error: PATH is not set." << std::endl; + exit(1); + } + + m_log << "PATH = '" << path << "'" << std::endl; + + // Filter m_compilers_path out of PATH + std::stringstream path_ss(path); + std::string component; + bool first_component = true; + bool found_citrun_path = false; + std::ostringstream new_path; + + while (std::getline(path_ss, component, path_sep())) { + if (component == m_compilers_path) { + found_citrun_path = true; + continue; + } + + if (first_component == false) + new_path << path_sep(); + + // It wasn't m_compilers_path, keep it + new_path << component; + first_component = false; + } + + if (!found_citrun_path) { + // + // This is a really bad situation to be in. We are currently + // executing and can't tell which PATH element we were called + // from. If we exec there's a chance we'll get stuck in an + // infinite exec loop. + // + // Error visibly so this can be fixed as soon as possible. + // + std::stringstream err; + err << "Error: '" << m_compilers_path << "' not in PATH."; + + std::cerr << err.str() << std::endl; + m_log << err.str() << std::endl; + exit(1); + } + + set_path(new_path.str()); +} + +// +// Guess if the argument is a source file. If it is stash a backup of the file +// and sync the timestamps. +// +void +InstFrontend::save_if_srcfile(char *arg) +{ + std::array<std::string, 4> exts = {{ ".c", ".cc", ".cxx", ".cpp" }}; + if (std::find_if(exts.begin(), exts.end(), ends_with(arg)) == exts.end()) + return; + + char *dst_fn; + if ((dst_fn = std::tmpnam(NULL)) == NULL) { + m_log << "tmpnam failed." << std::endl; + return; + } + + m_source_files.push_back(arg); + m_log << "Found source file '" << arg << "'" << std::endl; + + if (m_is_citruninst) + // In this mode the modified source file is written to a + // completely different file. + return; + + copy_file(dst_fn, arg); + m_temp_file_map[arg] = dst_fn; +} + +// +// Walks the entire command line taking action on important arguments. +// +void +InstFrontend::process_cmdline() +{ + bool object_arg = false; + bool compile_arg = false; + + // + // Walk every argument one by one looking for preprocessor switches, + // compile mode flags and source files. + // + for (auto &arg : m_args) { + if (std::strcmp(arg, "-E") == 0 || std::strcmp(arg, "-MM") == 0) { + // I don't know the repercussions of doing otherwise. + m_log << "Preprocessor argument " << arg << " found" + << std::endl; + exec_compiler(); + } + else if (std::strcmp(arg, "-o") == 0) + object_arg = true; + else if (std::strcmp(arg, "-c") == 0) + compile_arg = true; +#ifdef _WIN32 + else if (std::strcmp(arg, "/c") == 0) + compile_arg = true; +#endif // _WIN32 + + save_if_srcfile(arg); + } + + if (is_link(object_arg, compile_arg)) { + m_log << "Link detected, adding '"<< m_lib_path + << "' to command line." << std::endl; + m_args.push_back(const_cast<char *>(m_lib_path.c_str())); + } + + m_log << "Command line is '" << m_args[0]; + for (unsigned int i = 1; i < m_args.size(); ++i) + m_log << " " << m_args[i]; + m_log << "'" << std::endl; + + if (m_source_files.size() != 0) + return; + + m_log << "No source files found on command line." << std::endl; + exec_compiler(); +} + +// +// Creates and executes InstrumentAction objects for detected source files. +// +void +InstFrontend::instrument() +{ + // + // Create a special command line for ClangTool that looks like: + // clang++ src1.c src2.c -- clang++ -I. -Isrc -c src1.c src2.c + // + std::vector<const char *> clang_argv; + + clang_argv.push_back(m_args[0]); + for (auto &s : m_source_files) + clang_argv.push_back(s.c_str()); + clang_argv.push_back("--"); + clang_argv.insert(clang_argv.end(), m_args.begin(), m_args.end()); +#if defined(__OpenBSD__) + clang_argv.push_back("-I/usr/local/lib/clang/3.8.0/include"); + m_log << "Added clangtool argument '" << clang_argv.back() << "'" << std::endl; +#elif defined(__APPLE__) + clang_argv.push_back("-I/opt/local/libexec/llvm-3.8/lib/clang/3.8.1/include"); + m_log << "Added clangtool argument '" << clang_argv.back() << "'" << std::endl; +#elif defined(WIN32) + clang_argv.push_back(R"(-IC:\Clang\lib\clang\3.9.1\include)"); + m_log << "Added clangtool argument '" << clang_argv.back() << "'" << std::endl; +#endif + + int clang_argc = clang_argv.size(); + clang::tooling::CommonOptionsParser + op(clang_argc, &clang_argv[0], ToolingCategory); + clang::tooling::ClangTool + Tool(op.getCompilations(), op.getSourcePathList()); + + // + // Ignore all errors/warnings by default. + // This makes Tool.run() always return 0 too. + // + Tool.setDiagnosticConsumer(new clang::IgnoringDiagConsumer()); + + std::unique_ptr<InstrumentActionFactory> f = + std::make_unique<InstrumentActionFactory>(m_log, m_is_citruninst, m_source_files); + + // + // Run instrumentation. All source files are processed here. + // + Tool.run(f.get()); + + // All of the time until now is the overhead citrun_inst adds. + std::chrono::high_resolution_clock::time_point now = + std::chrono::high_resolution_clock::now(); + m_log << std::chrono::duration_cast<std::chrono::milliseconds>(now - m_start_time).count() + << " Milliseconds spent rewriting source." << std::endl; + + // This is as far as we go in citrun_inst mode. + if (m_is_citruninst) + exit(0); +} + +// +// Restore source files from stashed backups and sync timestamps. +// +void +InstFrontend::restore_original_src() +{ + for (auto &tmp_file : m_temp_file_map) { + m_log << "Restored '" << tmp_file.first << "'" << std::endl; + + copy_file(tmp_file.first, tmp_file.second); + unlink(tmp_file.second.c_str()); + } +} + +void +InstFrontend::compile_instrumented() +{ + int ret; + + ret = fork_compiler(); + m_log << "Rewritten source compile " << (ret ? "failed" : "successful") + << std::endl; + + restore_original_src(); + + if (ret) + // Rewritten compile failed. Run again without modified src. + exec_compiler(); +} diff --git a/inst/fe.h b/inst/fe.h @@ -0,0 +1,68 @@ +// +// Instrument Frontend. +// Takes command lines and instruments source code. +// +#include "log.h" + +#include <chrono> // std::chrono::high_resolution_clock +#include <map> // std::map +#include <string> // std::string + +class InstFrontend +{ + void save_if_srcfile(char *); + void restore_original_src(); + + std::string m_compilers_path; + std::string m_lib_path; + std::chrono::high_resolution_clock::time_point m_start_time; + std::map<std::string, std::string> m_temp_file_map; + + // Implemented by operating system specific classes. + virtual void log_os_str() = 0; + virtual char dir_sep() = 0; + virtual char path_sep() = 0; + virtual std::string lib_name() = 0; + virtual void set_path(std::string const &) = 0; + virtual bool is_link(bool, bool) = 0; + virtual void copy_file(std::string const &, std::string const &) = 0; + virtual void exec_compiler() = 0; + virtual int fork_compiler() = 0; + +protected: + std::vector<char *> m_args; + bool m_is_citruninst; + std::vector<std::string> m_source_files; + InstrumentLogger m_log; + +public: + InstFrontend(int, char *argv[], bool); + virtual ~InstFrontend() = 0; + + void log_identity(); + void get_paths(); + void clean_PATH(); + void process_cmdline(); + void instrument(); + void compile_instrumented(); +}; + +// +// Helper class that is a unary predicate suitable for use with std::find_if. +// +class ends_with +{ + std::string arg; +public: + ends_with(char *argument) : + arg(argument) + {} + + bool operator ()(std::string const &suffix) const + { + if (suffix.length() > arg.length()) + return false; + + return std::equal(suffix.rbegin(), suffix.rend(), arg.rbegin()); + } +}; diff --git a/inst/fe_unix.cc b/inst/fe_unix.cc @@ -0,0 +1,167 @@ +// +// Copyright (c) 2016 Kyle Milz <kyle@0x30.net> +// +// Permission to use, copy, modify, and distribute this software for any +// purpose with or without fee is hereby granted, provided that the above +// copyright notice and this permission notice appear in all copies. +// +// THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES +// WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF +// MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR +// ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES +// WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN +// ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF +// OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. +// +#include "fe_unix.h" + +#include <sys/stat.h> // stat +#include <sys/time.h> // utimes +#include <sys/utsname.h> // uname +#include <sys/wait.h> // waitpid + +#include <err.h> +#include <fstream> // ifstream, ofstream +#include <unistd.h> // execvp, fork, getpid, unlink + + +char +InstFrontendUnix::dir_sep() +{ + return '/'; +} + +char +InstFrontendUnix::path_sep() +{ + return ':'; +} + +std::string +InstFrontendUnix::lib_name() +{ + return "lib/libcitrun.a"; +} + +void +InstFrontendUnix::log_os_str() +{ + struct utsname utsname; + + if (uname(&utsname) == -1) + m_log << " (Unknown OS)"; + else + m_log << " (" << utsname.sysname << "-" << utsname.release + << " " << utsname.machine << ")"; + + // Sometimes we're not called as citrun_inst so force that here. + setprogname("citrun_inst"); +} + +void +InstFrontendUnix::set_path(std::string const &new_path) +{ + if (setenv("PATH", new_path.c_str(), 1)) + err(1, "setenv"); +} + +// +// Copies one file to another preserving timestamps. +// +void +InstFrontendUnix::copy_file(std::string const &dst_fn, std::string const &src_fn) +{ + struct stat sb; + struct timeval st_tim[2]; + + // Save original access and modification times + if (stat(src_fn.c_str(), &sb) < 0) + err(1, "stat"); +#ifdef __APPLE__ + TIMESPEC_TO_TIMEVAL(&st_tim[0], &sb.st_atimespec); + TIMESPEC_TO_TIMEVAL(&st_tim[1], &sb.st_mtimespec); +#else + TIMESPEC_TO_TIMEVAL(&st_tim[0], &sb.st_atim); + TIMESPEC_TO_TIMEVAL(&st_tim[1], &sb.st_mtim); +#endif + + std::ifstream src(src_fn, std::ios::binary); + std::ofstream dst(dst_fn, std::ios::binary); + + dst << src.rdbuf(); + + src.close(); + dst.close(); + + // + // Restore the original access and modification time, it's not critical + // if it fails. + // + if (utimes(dst_fn.c_str(), st_tim) < 0) + warn("utimes"); +} + +bool +InstFrontendUnix::is_link(bool object_arg, bool compile_arg) +{ + if (!object_arg && !compile_arg && m_source_files.size() > 0) + // Assume single line a.out compilation + // $ gcc main.c + return true; + else if (object_arg && !compile_arg) + // gcc -o main main.o fib.o while.o + // gcc -o main main.c fib.c + return true; + + return false; +} + +// +// Execute the compiler by calling execvp(3) on the m_args vector. +// +void +InstFrontendUnix::exec_compiler() +{ + if (m_is_citruninst) { + m_log << "Running as citrun_inst, not calling exec()" << std::endl; + exit(0); + } + + // Null termination explicitly mentioned in execvp(3). + m_args.push_back(NULL); + if (execvp(m_args[0], &m_args[0])) + err(1, "execvp"); +} + +// +// fork(2) then execute the compiler and wait for it to finish. Returns exit +// code of native compiler. +// +int +InstFrontendUnix::fork_compiler() +{ + pid_t child_pid; + int status; + int exit = -1; + + if ((child_pid = fork()) < 0) + err(1, "fork"); + + // If in child execute compiler. + if (child_pid == 0) + exec_compiler(); + + m_log << "Forked compiler '" << m_args[0] << "' " + << "pid is '" << child_pid << "'" << std::endl; + + // Wait for the child to finish so we can get its exit code. + if (waitpid(child_pid, &status, 0) < 0) + err(1, "waitpid"); + + // Decode the exit code from status. + if (WIFEXITED(status)) + exit = WEXITSTATUS(status); + + // Return the exit code of the native compiler. + return exit; +} diff --git a/inst/fe_unix.h b/inst/fe_unix.h @@ -0,0 +1,18 @@ +#include "fe.h" + +class InstFrontendUnix : public InstFrontend +{ + // Use InstFrontend's constructor + using InstFrontend::InstFrontend; + + // Mandatory interface implementation. + char dir_sep(); + char path_sep(); + std::string lib_name(); + void log_os_str(); + void set_path(std::string const &); + bool is_link(bool, bool); + void copy_file(std::string const &, std::string const &); + void exec_compiler(); + int fork_compiler(); +}; diff --git a/bin/inst_fewin32.cc b/inst/fewin32.cc diff --git a/bin/inst_fewin32.h b/inst/fewin32.h diff --git a/bin/inst_log.h b/inst/log.h diff --git a/inst/main.cc b/inst/main.cc @@ -0,0 +1,70 @@ +// +// Copyright (c) 2016 Kyle Milz <kyle@0x30.net> +// +// Permission to use, copy, modify, and distribute this software for any +// purpose with or without fee is hereby granted, provided that the above +// copyright notice and this permission notice appear in all copies. +// +// THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES +// WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF +// MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR +// ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES +// WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN +// ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF +// OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. +// +#ifdef _WIN32 +#include "inst_fewin32.h" + +#include <windows.h> +#include <Shlwapi.h> // PathFindFileNameA +#else /* _WIN32 */ +#include <err.h> +#include <libgen.h> // basename + +#include "fe_unix.h" // InstFrontend +#endif /* _WIN32 */ + +#include <cstring> // strcmp + + +int +main(int argc, char *argv[]) +{ + char *base_name; + bool is_citrun_inst = false; + +#ifdef _WIN32 + // XXX: error checking + base_name = PathFindFileNameA(argv[0]); +#else // _WIN32 + // Protect against argv[0] being an absolute path. + if ((base_name = basename(argv[0])) == NULL) + err(1, "basename"); +#endif // _WIN32 + + // Switch tool mode if we're called as 'citrun_inst'. + if ((std::strcmp(base_name, "citrun_inst") == 0) || + (std::strcmp(base_name, "citrun_inst.exe") == 0)) + is_citrun_inst = true; + + // Always re-search PATH for binary name (in non citrun_inst case). + if (std::strcmp(argv[0], base_name) != 0) + argv[0] = base_name; + +#ifdef _WIN32 + InstFrontendWin32 main(argc, argv, is_citrun_inst); +#else + InstFrontendUnix main(argc, argv, is_citrun_inst); +#endif + + main.log_identity(); + main.get_paths(); + main.clean_PATH(); + main.process_cmdline(); + + main.instrument(); + main.compile_instrumented(); + + return 0; +} diff --git a/inst/visitor.cc b/inst/visitor.cc @@ -0,0 +1,208 @@ +// +// Copyright (c) 2016 Kyle Milz <kyle@0x30.net> +// +// Permission to use, copy, modify, and distribute this software for any +// purpose with or without fee is hereby granted, provided that the above +// copyright notice and this permission notice appear in all copies. +// +// THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES +// WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF +// MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR +// ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES +// WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN +// ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF +// OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. +// +#include "visitor.h" + +#include <clang/AST/AST.h> +#include <clang/Lex/Lexer.h> +#include <sstream> +#include <string> + + +RewriteASTVisitor::RewriteASTVisitor(clang::Rewriter &R) : + m_TheRewriter(R), + m_SM(R.getSourceMgr()), + m_lopt(R.getLangOpts()), + m_counters(), + m_counter_descr( {{ + "Function definitions", + "If statements", + "For loops", + "While loops", + "Do while loops", + "Switch statements", + "Return statement values", + "Call expressions", + "Total statements", + "Binary operators", + "Errors rewriting source code" + }} ) +{ +} + +bool +RewriteASTVisitor::TraverseStmt(clang::Stmt *s) +{ + if (s == NULL) + return true; + + clang::SourceLocation start_loc = s->getBeginLoc(); + if (m_SM.isInMainFile(start_loc) == false) + return false; + + // Instrumenting statement conditions in macros works perfectly. + // Instrumenting binary operators in macros does not work well. + if (clang::Lexer::isAtStartOfMacroExpansion(start_loc, m_SM, m_lopt)) + return false; + + RecursiveASTVisitor<RewriteASTVisitor>::TraverseStmt(s); + return true; +} + +bool +RewriteASTVisitor::TraverseDecl(clang::Decl *d) +{ + if (m_SM.isInMainFile(d->getBeginLoc()) == false) + return false; + + if (clang::isa<clang::VarDecl>(d)) { + clang::VarDecl *vd = clang::cast<clang::VarDecl>(d); + if (vd->hasGlobalStorage()) + return false; + } + if (clang::isa<clang::RecordDecl>(d)) + return false; + if (clang::isa<clang::EnumDecl>(d)) + return false; + + RecursiveASTVisitor<RewriteASTVisitor>::TraverseDecl(d); + return true; +} + +bool +RewriteASTVisitor::VisitVarDecl(clang::VarDecl *d) +{ + return true; +} + +bool +RewriteASTVisitor::VisitStmt(clang::Stmt *s) +{ + ++m_counters[TOTAL_STMT]; + return true; +} + +bool +RewriteASTVisitor::VisitIfStmt(clang::IfStmt *i) +{ + modify_stmt(i->getCond(), m_counters[IF_STMT]); + return true; +} + +bool +RewriteASTVisitor::VisitForStmt(clang::ForStmt *f) +{ + modify_stmt(f->getCond(), m_counters[FOR_STMT]); + return true; +} + +bool +RewriteASTVisitor::VisitWhileStmt(clang::WhileStmt *w) +{ + modify_stmt(w->getCond(), m_counters[WHILE_STMT]); + return true; +} + +bool +RewriteASTVisitor::VisitDoStmt(clang::DoStmt *d) +{ + modify_stmt(d->getCond(), m_counters[DOWHILE_STMT]); + return true; +} + +bool +RewriteASTVisitor::VisitSwitchStmt(clang::SwitchStmt *s) +{ + modify_stmt(s->getCond(), m_counters[SWITCH_STMT]); + return true; +} + +bool +RewriteASTVisitor::VisitReturnStmt(clang::ReturnStmt *r) +{ + modify_stmt(r->getRetValue(), m_counters[RET_STMT_VAL]); + return true; +} + +bool +RewriteASTVisitor::VisitCallExpr(clang::CallExpr *c) +{ + modify_stmt(c, m_counters[CALL_EXPR]); + return true; +} + +bool +RewriteASTVisitor::VisitBinaryOperator(clang::BinaryOperator *b) +{ + // If we can't rewrite the last token, don't even start. + if (b->getEndLoc().isMacroID()) + return true; + modify_stmt(b, m_counters[BIN_OPER]); + return true; +} + +bool +RewriteASTVisitor::modify_stmt(clang::Stmt *s, int &counter) +{ + if (s == NULL) + return false; + + std::stringstream ss; + ss << "(++_citrun.data[" + << m_SM.getPresumedLineNumber(s->getBeginLoc()) - 1 + << "], "; + + if (m_TheRewriter.InsertTextBefore(s->getBeginLoc(), ss.str())) { + ++m_counters[REWRITE_ERROR]; + return false; + } + + m_TheRewriter.InsertTextAfter(real_loc_end(s), ")"); + ++counter; + + return true; +} + +bool +RewriteASTVisitor::VisitFunctionDecl(clang::FunctionDecl *f) +{ + // Only function definitions (with bodies), not declarations. + if (f->hasBody() == 0) + return true; + + std::stringstream rewrite_text; + + clang::Stmt *FuncBody = f->getBody(); + clang::SourceLocation curly_brace(FuncBody->getBeginLoc().getLocWithOffset(1)); + + // Animate function calls by firing the entire declaration. + int decl_start = m_SM.getPresumedLineNumber(f->getBeginLoc()); + int decl_end = m_SM.getPresumedLineNumber(curly_brace); + for (int i = decl_start; i <= decl_end; ++i) + rewrite_text << "++_citrun.data[" << i - 1 << "];"; + + // Rewrite the function source right after the beginning curly brace. + m_TheRewriter.InsertTextBefore(curly_brace, rewrite_text.str()); + + ++m_counters[FUNC_DEF]; + return true; +} + +clang::SourceLocation +RewriteASTVisitor::real_loc_end(clang::Stmt *d) +{ + clang::SourceLocation _e(d->getEndLoc()); + return clang::Lexer::getLocForEndOfToken(_e, 0, m_SM, m_lopt); +} diff --git a/bin/inst_visitor.h b/inst/visitor.h