diff --git a/include/cppast/compile_config.hpp b/include/cppast/compile_config.hpp index cfc668e..5db32fd 100644 --- a/include/cppast/compile_config.hpp +++ b/include/cppast/compile_config.hpp @@ -56,7 +56,7 @@ namespace cppast { public: /// \effects Sets the given C++ standard and compilation flags. - void set_standard(cpp_standard standard, type_safe::flag_set flags) + void set_flags(cpp_standard standard, type_safe::flag_set flags = {}) { do_set_flags(standard, flags); } @@ -79,6 +79,13 @@ namespace cppast do_remove_macro_definition(std::move(name)); } + /// \returns A unique name of the configuration. + /// \notes This allows detecting mismatches of configurations and parsers. + const char* name() const noexcept + { + return do_get_name(); + } + protected: compile_config(std::vector def_flags) : flags_(std::move(def_flags)) { @@ -110,6 +117,10 @@ namespace cppast /// \effects Undefines the given macro. virtual void do_remove_macro_definition(std::string name) = 0; + /// \returns A unique name of the configuration. + /// \notes This allows detecting mismatches of configurations and parsers. + virtual const char* do_get_name() const noexcept = 0; + std::vector flags_; }; } // namespace cppast diff --git a/include/cppast/libclang_parser.hpp b/include/cppast/libclang_parser.hpp index df56294..78d8697 100644 --- a/include/cppast/libclang_parser.hpp +++ b/include/cppast/libclang_parser.hpp @@ -9,12 +9,31 @@ namespace cppast { + class libclang_compile_config; + + namespace detail + { + struct libclang_compile_config_access + { + static const std::string& clang_binary(const libclang_compile_config& config); + + static const std::vector& flags(const libclang_compile_config& config); + }; + } // namespace detail + /// Compilation config for the [cppast::libclang_parser](). class libclang_compile_config final : public compile_config { public: libclang_compile_config(); + /// \effects Sets the path to the location of the `clang++` binary. + /// \notes It will be used for preprocessing. + void set_clang_binary(std::string binary) + { + clang_binary_ = std::move(binary); + } + private: void do_set_flags(cpp_standard standard, type_safe::flag_set flags) override; @@ -23,6 +42,15 @@ namespace cppast void do_add_macro_definition(std::string name, std::string definition) override; void do_remove_macro_definition(std::string name) override; + + const char* do_get_name() const noexcept override + { + return "libclang"; + } + + std::string clang_binary_; + + friend detail::libclang_compile_config_access; }; /// A parser that uses libclang. @@ -33,7 +61,7 @@ namespace cppast ~libclang_parser() noexcept override; private: - std::unique_ptr do_parse(const cpp_entity_index& idx, const std::string& path, + std::unique_ptr do_parse(const cpp_entity_index& idx, std::string path, const compile_config& config) const override; struct impl; diff --git a/include/cppast/parser.hpp b/include/cppast/parser.hpp index 2c6803d..68b6902 100644 --- a/include/cppast/parser.hpp +++ b/include/cppast/parser.hpp @@ -25,10 +25,11 @@ namespace cppast /// \effects Parses the given file. /// \returns The [cppast::cpp_file]() object describing it. - std::unique_ptr parse(const cpp_entity_index& idx, const std::string& path, + /// \requires The dynamic type of `config` must match the required config type. + std::unique_ptr parse(const cpp_entity_index& idx, std::string path, const compile_config& config) const { - return do_parse(idx, path, config); + return do_parse(idx, std::move(path), config); } protected: @@ -37,9 +38,8 @@ namespace cppast private: /// \effects Parses the given file. /// \returns The [cppast::cpp_file]() object describing it. - virtual std::unique_ptr do_parse(const cpp_entity_index& idx, - const std::string& path, - const compile_config& config) const = 0; + virtual std::unique_ptr do_parse(const cpp_entity_index& idx, std::string path, + const compile_config& config) const = 0; }; } // namespace cppast diff --git a/src/libclang/libclang_parser.cpp b/src/libclang/libclang_parser.cpp index d031815..0ef6ec3 100644 --- a/src/libclang/libclang_parser.cpp +++ b/src/libclang/libclang_parser.cpp @@ -4,12 +4,28 @@ #include +#include + #include "raii_wrapper.hpp" +#include "preprocessor.hpp" using namespace cppast; +const std::string& detail::libclang_compile_config_access::clang_binary( + const libclang_compile_config& config) +{ + return config.clang_binary_; +} + +const std::vector& detail::libclang_compile_config_access::flags( + const libclang_compile_config& config) +{ + return config.get_flags(); +} + libclang_compile_config::libclang_compile_config() : compile_config({}) { + set_clang_binary("clang++"); } void libclang_compile_config::do_set_flags(cpp_standard standard, @@ -84,9 +100,18 @@ libclang_parser::~libclang_parser() noexcept { } -std::unique_ptr libclang_parser::do_parse(const cpp_entity_index& idx, - const std::string& path, - const compile_config& config) const +std::unique_ptr libclang_parser::do_parse(const cpp_entity_index& idx, std::string path, + const compile_config& c) const { - return nullptr; + DEBUG_ASSERT(std::strcmp(c.name(), "libclang") == 0, detail::precondition_error_handler{}, + "config has mismatched type"); + auto& config = static_cast(c); + auto preprocessed = detail::preprocess(config, path.c_str()); + + cpp_file::builder builder(std::move(path)); + + for (auto& e : preprocessed.entities) + builder.add_child(std::move(e.entity)); + + return builder.finish(idx); } diff --git a/src/libclang/preprocessor.cpp b/src/libclang/preprocessor.cpp new file mode 100644 index 0000000..8ce8edb --- /dev/null +++ b/src/libclang/preprocessor.cpp @@ -0,0 +1,413 @@ +// Copyright (C) 2017 Jonathan Müller +// This file is subject to the license terms in the LICENSE file +// found in the top-level directory of this distribution. + +#include "preprocessor.hpp" + +#include +#include +#include +#include + +// treat the tiny-process-library as header only +#include +#include +#if defined(WIN32) || defined(_WIN32) || defined(__WIN32) +#include +#else +#include +#endif + +#include +#include +#include + +#include + +using namespace cppast; +namespace ts = type_safe; + +namespace +{ + // build the command that runs the preprocessor + std::string get_command(const libclang_compile_config& c, const char* full_path) + { + // -E: print preprocessor output + // -CC: keep comments, even in macro + // -dD: print macro definitions as well + // -Wno-pragma-once-outside-header: hide wrong warning + std::string cmd(detail::libclang_compile_config_access::clang_binary(c) + + " -E -CC -dD -Wno-pragma-once-outside-header "); + + // add other flags + for (auto& flag : detail::libclang_compile_config_access::flags(c)) + { + cmd += flag; + cmd += ' '; + } + + // add path to file being processed + cmd += full_path; + + return cmd; + } + + // gets the full preprocessor output + std::string get_full_preprocess_output(const libclang_compile_config& c, const char* full_path) + { + std::string preprocessed; + + auto cmd = get_command(c, full_path); + Process process(cmd, "", + [&](const char* str, std::size_t n) { + preprocessed.reserve(preprocessed.size() + n); + for (auto end = str + n; str != end; ++str) + if (*str != '\r') + preprocessed.push_back(*str); + }, + [&](const char* str, std::size_t n) { + std::fprintf(stderr, "%.*s\n", static_cast(n), + str); // TODO: log error properly + }); + + auto exit_code = process.get_exit_status(); + if (exit_code != 0) + DEBUG_UNREACHABLE(detail::assert_handler{}); // TODO: improve error handling + + return preprocessed; + } + + class position + { + public: + position(ts::object_ref result, const char* ptr) noexcept + : result_(result), cur_line_(1u), ptr_(ptr), write_(true) + { + } + + void write_str(std::string str) + { + if (write_ == false) + return; + for (auto c : str) + { + *result_ += c; + if (c == '\n') + ++cur_line_; + } + } + + void bump() noexcept + { + if (write_ == true) + { + result_->push_back(*ptr_); + if (*ptr_ == '\n') + ++cur_line_; + } + ++ptr_; + } + + void bump(std::size_t offset) noexcept + { + for (std::size_t i = 0u; i != offset; ++i) + bump(); + } + + // no write, no newline detection + void skip(std::size_t offset = 1u) noexcept + { + ptr_ += offset; + } + + void enable_write() noexcept + { + write_.set(); + } + + void disable_write() noexcept + { + write_.try_reset(); + } + + explicit operator bool() const noexcept + { + return *ptr_ != '\0'; + } + + const char* ptr() const noexcept + { + return ptr_; + } + + unsigned cur_line() const noexcept + { + return cur_line_; + } + + bool was_newl() const noexcept + { + return result_->empty() || result_->back() == '\n'; + } + + private: + ts::object_ref result_; + unsigned cur_line_; + const char* ptr_; + ts::flag write_; + }; + + bool starts_with(const position& p, const char* str) + { + return std::strncmp(p.ptr(), str, std::strlen(str)) == 0; + } + + bool bump_c_str(position& p) + { + if (!starts_with(p, "/*")) + return false; + p.bump(2u); + + while (!starts_with(p, "*/")) + p.bump(); + p.bump(2u); + return true; + } + + bool bump_cpp_str(position& p) + { + if (!starts_with(p, "//")) + return false; + p.bump(2u); + + while (!starts_with(p, "\n")) + p.bump(); + return true; + } + + void skip_spaces(position& p) + { + while (starts_with(p, " ")) + p.skip(); + } + + std::unique_ptr parse_macro(position& p) + { + // format (at new line): #define [replacement] + // or: #define () [replacement] + if (!p.was_newl() || !starts_with(p, "#define")) + return nullptr; + p.skip(std::strlen("#define")); + skip_spaces(p); + + std::string name; + while (!starts_with(p, "(") && !starts_with(p, " ") && !starts_with(p, "\n")) + { + name += *p.ptr(); + p.skip(); + } + + ts::optional args; + if (starts_with(p, "(")) + { + std::string str; + for (p.skip(); !starts_with(p, ")"); p.skip()) + str += *p.ptr(); + p.skip(); + args = std::move(str); + } + + std::string rep; + for (skip_spaces(p); !starts_with(p, "\n"); p.skip()) + rep += *p.ptr(); + // don't skip newline + + return cpp_macro_definition::build(std::move(name), std::move(args), std::move(rep)); + } + + ts::optional parse_undef(position& p) + { + // format (at new line): #undef + if (!p.was_newl() || !starts_with(p, "#undef")) + return ts::nullopt; + p.skip(std::strlen("#undef")); + + std::string result; + for (skip_spaces(p); !starts_with(p, "\n"); p.skip()) + result += *p.ptr(); + // don't skip newline + + return result; + } + + bool skip_pragma(position& p) + { + // format (at new line): #pragma \n + if (!p.was_newl() || !starts_with(p, "#pragma")) + return false; + + while (!starts_with(p, "\n")) + p.skip(); + // don't skip newline + + return true; + } + + struct linemarker + { + std::string file; + unsigned line; + enum + { + line_directive, // no change in file + enter_new, // open a new file + enter_old, // return to an old file + } flag = line_directive; + bool is_system = false; + }; + + ts::optional parse_linemarker(position& p) + { + // format (at new line): # "" + // flag 1: enter_new + // flag 2: enter_old + // flag 3: system file + // flag 4: ignored + if (!p.was_newl() || !starts_with(p, "#")) + return ts::nullopt; + p.skip(); + DEBUG_ASSERT(!starts_with(p, "define") && !starts_with(p, "undef") + && !starts_with(p, "pragma"), + detail::assert_handler{}, "handle macros first"); + + linemarker result; + + std::string line; + for (skip_spaces(p); std::isdigit(*p.ptr()); p.skip()) + line += *p.ptr(); + result.line = unsigned(std::stoi(line)); + + skip_spaces(p); + DEBUG_ASSERT(*p.ptr() == '"', detail::assert_handler{}); + p.skip(); + + std::string file_name; + for (; !starts_with(p, "\""); p.skip()) + file_name += *p.ptr(); + p.skip(); + result.file = std::move(file_name); + + for (; !starts_with(p, "\n"); p.skip()) + { + skip_spaces(p); + + switch (*p.ptr()) + { + case '1': + DEBUG_ASSERT(result.flag == linemarker::line_directive, detail::assert_handler{}); + result.flag = linemarker::enter_new; + break; + case '2': + DEBUG_ASSERT(result.flag == linemarker::line_directive, detail::assert_handler{}); + result.flag = linemarker::enter_old; + break; + case '3': + result.is_system = true; + break; + case '4': + break; // ignored + + default: + DEBUG_UNREACHABLE(detail::assert_handler{}, "invalid line marker"); + break; + } + } + p.skip(); + + return result; + } +} + +detail::preprocessor_output detail::preprocess(const libclang_compile_config& config, + const char* path) +{ + detail::preprocessor_output result; + + auto output = get_full_preprocess_output(config, path); + + position p(ts::ref(result.source), output.c_str()); + std::size_t file_depth = 0u; + while (p) + { + if (auto macro = parse_macro(p)) + { + if (file_depth == 0u) + result.entities.push_back({std::move(macro), p.cur_line()}); + } + else if (auto undef = parse_undef(p)) + { + if (file_depth == 0u) + result.entities + .erase(std::remove_if(result.entities.begin(), result.entities.end(), + [&](const pp_entity& e) { + return e.entity->kind() + == cpp_entity_kind::macro_definition_t + && e.entity->name() == undef.value(); + }), + result.entities.end()); + } + else if (skip_pragma(p)) + continue; + else if (auto lm = parse_linemarker(p)) + { + switch (lm.value().flag) + { + case linemarker::line_directive: + break; // ignore + // no need to handle it, preprocessed output doesn't need to match line numbers precisely + + case linemarker::enter_new: + if (file_depth == 0u && lm.value().file.front() != '<') + { + // this file is directly included by the given file + // so build entity (first, the write updates the line count) + result.entities.push_back( + {cpp_include_directive::build(cpp_file_ref(cpp_entity_id(lm.value().file), + lm.value().file), + // not really correct, but nice approximation + lm.value().is_system ? + cpp_include_kind::system : + cpp_include_kind::local), + p.cur_line()}); + // but also write the include directive again + p.write_str("#include \"" + lm.value().file + "\"\n"); + } + + ++file_depth; + p.disable_write(); + break; + + case linemarker::enter_old: + --file_depth; + if (file_depth == 0u) + { + DEBUG_ASSERT(lm.value().file == path, detail::assert_handler{}); + p.enable_write(); + } + break; + } + } + else if (bump_c_str(p)) + // write an additional newline after each string + // this allows matching documentation comments to entities generated from macros + // as the entity corresponding to the documentation comment will be on the next line + // otherwise all entities would have the same line number + p.write_str("\n"); + else if (bump_cpp_str(p)) + continue; + else + p.bump(); + } + + return result; +} diff --git a/src/libclang/preprocessor.hpp b/src/libclang/preprocessor.hpp new file mode 100644 index 0000000..8bc268c --- /dev/null +++ b/src/libclang/preprocessor.hpp @@ -0,0 +1,31 @@ +// Copyright (C) 2017 Jonathan Müller +// This file is subject to the license terms in the LICENSE file +// found in the top-level directory of this distribution. + +#ifndef CPPAST_PREPROCESSOR_HPP_INCLUDED +#define CPPAST_PREPROCESSOR_HPP_INCLUDED + +#include +#include + +namespace cppast +{ + namespace detail + { + struct pp_entity + { + std::unique_ptr entity; + unsigned line; + }; + + struct preprocessor_output + { + std::string source; + std::vector entities; + }; + + preprocessor_output preprocess(const libclang_compile_config& config, const char* path); + } +} // namespace cppast::detail + +#endif // CPPAST_PREPROCESSOR_HPP_INCLUDED