Add simple tokenization function

This commit is contained in:
Jonathan Müller 2017-10-21 12:45:44 +02:00
commit 3b9d410574
28 changed files with 919 additions and 203 deletions

View file

@ -15,12 +15,13 @@ namespace cppast
/// The kinds of C++ tokens.
enum class cpp_token_kind
{
identifier, //< Any identifier.
keyword, //< Any keyword.
literal, //< Any literal.
punctuation, //< Any other punctuation.
unknown, //< An unknown token.
identifier, //< Any identifier.
keyword, //< Any keyword.
int_literal, //< An integer literal.
float_literal, //< A floating point literal.
char_literal, //< A character literal.
string_literal, //< A string literal.
punctuation //< Any other punctuation.
};
/// A C++ token.
@ -74,16 +75,15 @@ namespace cppast
std::vector<cpp_token> tokens_;
};
/// Tokenizes a string.
/// \effects Splits the string into C++ tokens.
/// The string must contain valid tokens and must already be preprocessed (i.e. translation phase 6 is already done).
/// \returns The tokenized string.
static cpp_token_string tokenize(std::string str);
/// \effects Creates it from a sequence of tokens.
cpp_token_string(std::vector<cpp_token> tokens) : tokens_(std::move(tokens)) {}
/// \effects Creates from a string.
/// \notes This does not do tokenization, it will only store a single, unknown token!
static cpp_token_string from_string(std::string str)
{
return cpp_token_string({cpp_token(cpp_token_kind::unknown, std::move(str))});
}
/// \exclude target
using iterator = std::vector<cpp_token>::const_iterator;

View file

@ -95,8 +95,8 @@ set(libclang_source
libclang/preprocessor.hpp
libclang/raii_wrapper.hpp
libclang/template_parser.cpp
libclang/tokenizer.cpp
libclang/tokenizer.hpp
libclang/cxtokenizer.cpp
libclang/cxtokenizer.hpp
libclang/type_parser.cpp
libclang/variable_parser.cpp)

View file

@ -1158,7 +1158,7 @@ void detail::write_template_arguments(
void detail::write_token_string(code_generator::output& output, const cpp_token_string& tokens)
{
auto last_kind = cpp_token_kind::unknown;
auto last_kind = cpp_token_kind::punctuation; // neutral regarding whitespace
for (auto& token : tokens)
{
switch (token.kind)
@ -1177,14 +1177,15 @@ void detail::write_token_string(code_generator::output& output, const cpp_token_
output << operator_ws;
break;
case cpp_token_kind::literal:
// determine kind of literal
if (token.spelling.front() == '\"')
output << string_literal(token.spelling);
else if (token.spelling.find('.') != std::string::npos)
output << float_literal(token.spelling);
else
output << int_literal(token.spelling);
case cpp_token_kind::int_literal:
output << int_literal(token.spelling);
break;
case cpp_token_kind::float_literal:
output << float_literal(token.spelling);
break;
case cpp_token_kind::char_literal:
case cpp_token_kind::string_literal:
output << string_literal(token.spelling);
break;
case cpp_token_kind::punctuation:
@ -1206,9 +1207,6 @@ void detail::write_token_string(code_generator::output& output, const cpp_token_
else
output << punctuation(token.spelling);
break;
case cpp_token_kind::unknown:
output << token_seq(token.spelling);
}
last_kind = token.kind;

View file

@ -6,6 +6,9 @@
#include <algorithm>
#include <cctype>
#include <cstring>
#include <type_safe/optional.hpp>
#include <cppast/detail/assert.hpp>
using namespace cppast;
@ -16,6 +19,586 @@ void cpp_token_string::builder::unmunch()
tokens_.back().spelling = ">";
}
namespace
{
template <std::size_t N>
bool starts_with(const char* ptr, const char (&str)[N])
{
return std::strncmp(ptr, str, N - 1u) == 0;
}
bool starts_with(const char* ptr, const std::string& str)
{
return std::strncmp(ptr, str.c_str(), str.size()) == 0;
}
template <std::size_t N>
bool bump_if(const char*& ptr, const char (&str)[N])
{
if (starts_with(ptr, str))
{
ptr += N - 1;
return true;
}
else
return false;
}
bool bump_if(const char*& ptr, const std::string& str)
{
if (starts_with(ptr, str))
{
ptr += str.size();
return true;
}
else
return false;
}
bool is_identifier_nondigit(char c)
{
// assume ASCII
if (c >= 'a' && c <= 'z')
return true;
else if (c >= 'A' && c <= 'Z')
return true;
else if (c == '_')
return true;
else
// technically \uXXX is allowed as well, but I haven't seen that used ever
return false;
}
bool is_digit(char c)
{
return c >= '0' && c <= '9';
}
bool is_hexadecimal_digit(char c)
{
return is_digit(c) || (c >= 'a' && c <= 'f') || (c >= 'A' && c <= 'F');
}
type_safe::optional<std::string> bump_identifier(const char*& ptr)
{
if (is_identifier_nondigit(*ptr))
{
std::string result;
result += *ptr++;
while (is_identifier_nondigit(*ptr) || is_digit(*ptr))
result += *ptr++;
return result;
}
else
return type_safe::nullopt;
}
type_safe::optional<cpp_token> identifier_token(const char*& ptr)
{
auto identifier = bump_identifier(ptr);
if (!identifier)
return type_safe::nullopt;
static constexpr const char* keywords[] = {"alignas",
"alignof",
"asm",
"auto",
"bool",
"break",
"case",
"catch",
"char",
"char16_t",
"char32_t",
"class",
"const",
"constexpr",
"const_cast",
"continue",
"decltype",
"default",
"delete",
"do",
"double",
"dynamic_cast",
"else",
"enum",
"explicit",
"export",
"extern",
"false",
"float",
"for",
"friend",
"goto",
"if",
"inline",
"int",
"long",
"mutable",
"namespace",
"new",
"noexcept",
"nullptr",
"operator",
"private",
"protected",
"public",
"register",
"reinterpret_cast",
"return",
"short",
"signed",
"sizeof",
"static",
"static_assert",
"static_cast",
"struct",
"switch",
"template",
"this",
"thread_local",
"throw",
"true",
"try",
"typedef",
"typeid",
"typename",
"union",
"unsigned",
"using",
"virtual",
"void",
"volatile",
"wchar_t",
"while"};
auto find_keyword = std::find(std::begin(keywords), std::end(keywords), identifier.value());
if (find_keyword != std::end(keywords))
return cpp_token(cpp_token_kind::keyword, identifier.value());
else if (identifier == "and")
return cpp_token(cpp_token_kind::punctuation, "&&");
else if (identifier == "and_eq")
return cpp_token(cpp_token_kind::punctuation, "&=");
else if (identifier == "bitand")
return cpp_token(cpp_token_kind::punctuation, "&");
else if (identifier == "bitor")
return cpp_token(cpp_token_kind::punctuation, "|");
else if (identifier == "compl")
return cpp_token(cpp_token_kind::punctuation, "~");
else if (identifier == "not")
return cpp_token(cpp_token_kind::punctuation, "!");
else if (identifier == "not_eq")
return cpp_token(cpp_token_kind::punctuation, "!=");
else if (identifier == "or")
return cpp_token(cpp_token_kind::punctuation, "||");
else if (identifier == "or_eq")
return cpp_token(cpp_token_kind::punctuation, "|=");
else if (identifier == "xor")
return cpp_token(cpp_token_kind::punctuation, "^");
else if (identifier == "xor_eq")
return cpp_token(cpp_token_kind::punctuation, "^=");
else
return cpp_token(cpp_token_kind::identifier, identifier.value());
}
void append_udl_suffix(std::string& literal, const char*& ptr)
{
if (auto id = identifier_token(ptr))
literal += id.value().spelling;
}
template <typename DigitPredicate>
std::string parse_digit_sequence(const char*& ptr, DigitPredicate is_digit)
{
std::string result;
for (; is_digit(*ptr) || *ptr == '\''; ++ptr)
if (*ptr != '\'')
result += *ptr;
DEBUG_ASSERT(result.back() != '\'', detail::assert_handler{});
return result;
}
void append_integer_suffix(std::string& literal, const char*& ptr)
{
auto append_unsigned_suffix = [](std::string& literal, const char*& ptr) {
if (*ptr == 'u' || *ptr == 'U')
{
literal += *ptr++;
return true;
}
else
return false;
};
auto append_long_suffix = [](std::string& literal, const char*& ptr) {
if (starts_with(ptr, "ll") || starts_with(ptr, "LL"))
{
literal += *ptr++;
literal += *ptr++;
return true;
}
else if (*ptr == 'l' || *ptr == 'L')
{
literal += *ptr++;
return true;
}
else
return false;
};
if (append_unsigned_suffix(literal, ptr))
append_long_suffix(literal, ptr);
else if (append_long_suffix(literal, ptr))
append_unsigned_suffix(literal, ptr);
else
append_udl_suffix(literal, ptr);
}
void append_floating_point_suffix(std::string& literal, const char*& ptr)
{
if (*ptr == 'f' || *ptr == 'F')
literal += *ptr++;
else if (*ptr == 'l' || *ptr == 'L')
literal += *ptr++;
else
append_udl_suffix(literal, ptr);
}
type_safe::optional<std::string> parse_floating_point_exponent(const char*& ptr)
{
if (*ptr == 'e' || *ptr == 'E' || *ptr == 'p' || *ptr == 'P')
{
std::string result;
result += *ptr++;
if (*ptr == '+' || *ptr == '-')
result += *ptr++;
result += parse_digit_sequence(ptr, &is_digit);
return result;
}
else
return type_safe::nullopt;
}
type_safe::optional<cpp_token> numeric_literal_token(const char*& ptr)
{
if (starts_with(ptr, "0b") || starts_with(ptr, "0B")) // binary integer literal
{
std::string result;
result += *ptr++;
result += *ptr++;
result += parse_digit_sequence(ptr, [](char c) { return c == '0' || c == '1'; });
append_integer_suffix(result, ptr);
return cpp_token(cpp_token_kind::int_literal, result);
}
else if (starts_with(ptr, "0x") || starts_with(ptr, "0X")) // hexadecimal literal
{
std::string result;
result += *ptr++;
result += *ptr++;
result += parse_digit_sequence(ptr, &is_hexadecimal_digit);
auto is_float = false;
if (*ptr == '.')
{
// floating point hexadecimal
is_float = true;
result += *ptr++;
result += parse_digit_sequence(ptr, &is_hexadecimal_digit);
}
if (auto exp = parse_floating_point_exponent(ptr))
{
is_float = true;
// floating point exponent
result += exp.value();
}
if (is_float)
append_floating_point_suffix(result, ptr);
else
append_integer_suffix(result, ptr);
return cpp_token(is_float ? cpp_token_kind::float_literal : cpp_token_kind::int_literal,
result);
}
else if (is_digit(*ptr)) // octal and decimal literals
{
std::string result;
result += parse_digit_sequence(ptr, &is_digit);
auto is_float = false;
if (*ptr == '.')
{
// floating point decimal
is_float = true;
result += *ptr++;
result += parse_digit_sequence(ptr, &is_hexadecimal_digit);
}
if (auto exp = parse_floating_point_exponent(ptr))
{
// floating point exponent
is_float = true;
result += exp.value();
}
if (is_float)
append_floating_point_suffix(result, ptr);
else
append_integer_suffix(result, ptr);
return cpp_token(is_float ? cpp_token_kind::float_literal : cpp_token_kind::int_literal,
result);
}
else if (*ptr == '.' && is_digit(ptr[1]))
{
std::string result;
// floating point fraction
result += *ptr++;
result += parse_digit_sequence(ptr, &is_digit);
if (auto exp = parse_floating_point_exponent(ptr))
result += exp.value();
append_floating_point_suffix(result, ptr);
return cpp_token(cpp_token_kind::float_literal, result);
}
else
return type_safe::nullopt;
}
type_safe::optional<std::string> parse_encoding_prefix(const char*& ptr)
{
if (bump_if(ptr, "u8"))
return "u8";
else if (bump_if(ptr, "u"))
return "u";
else if (bump_if(ptr, "U"))
return "U";
else if (bump_if(ptr, "L"))
return "L";
else
return type_safe::nullopt;
}
type_safe::optional<cpp_token> character_literal(const char*& ptr)
{
auto save = ptr;
auto prefix = parse_encoding_prefix(ptr);
if (*ptr != '\'')
{
ptr = save;
return type_safe::nullopt;
}
else
{
auto result = prefix.value_or("");
result += *ptr++;
while (*ptr != '\'')
{
DEBUG_ASSERT(*ptr, detail::assert_handler{});
if (*ptr == '\\')
result += *ptr++;
result += *ptr++;
}
result += *ptr++;
append_udl_suffix(result, ptr);
return cpp_token(cpp_token_kind::char_literal, result);
}
}
type_safe::optional<cpp_token> string_literal(const char*& ptr)
{
auto save = ptr;
auto prefix = parse_encoding_prefix(ptr);
if (starts_with(ptr, "R\""))
{
// raw string literal
auto result = prefix.value_or("");
result += *ptr++;
result += *ptr++;
std::string terminator;
terminator += ")";
while (*ptr != '(')
{
result += *ptr;
terminator += *ptr++;
}
result += *ptr++;
terminator += '"';
while (!bump_if(ptr, terminator))
{
DEBUG_ASSERT(ptr, detail::assert_handler{});
result += *ptr++;
}
result += terminator;
append_udl_suffix(result, ptr);
return cpp_token(cpp_token_kind::string_literal, result);
}
else if (starts_with(ptr, "\""))
{
// regular string literal
auto result = prefix.value_or("");
result += *ptr++;
while (*ptr != '"')
{
DEBUG_ASSERT(*ptr, detail::assert_handler{});
if (*ptr == '\\')
result += *ptr++;
result += *ptr++;
}
result += *ptr++;
append_udl_suffix(result, ptr);
return cpp_token(cpp_token_kind::string_literal, result);
}
else
{
ptr = save;
return type_safe::nullopt;
}
}
type_safe::optional<cpp_token> digraph_token(const char*& ptr)
{
if (bump_if(ptr, "<%"))
return cpp_token(cpp_token_kind::punctuation, "{");
else if (bump_if(ptr, "%>"))
return cpp_token(cpp_token_kind::punctuation, "}");
else if (starts_with(ptr, "<::") && ptr[3] != ':' && ptr[3] != '>')
// don't detect digraph in std::vector<::std::string>
return type_safe::nullopt;
else if (bump_if(ptr, "<:"))
return cpp_token(cpp_token_kind::punctuation, "[");
else if (bump_if(ptr, ":>"))
return cpp_token(cpp_token_kind::punctuation, "]");
else if (bump_if(ptr, "%:%:"))
return cpp_token(cpp_token_kind::punctuation, "##");
else if (bump_if(ptr, "%:"))
return cpp_token(cpp_token_kind::punctuation, "#");
else
return type_safe::nullopt;
}
type_safe::optional<cpp_token> punctuation_token(const char*& ptr)
{
static constexpr const char* punctuations[] = {
// tokens staring with #
"##",
"#",
// tokens starting with .
"...",
".*",
".",
// tokens starting with :
"::",
":",
// tokens starting with +
"+=",
"++",
"+",
// tokens starting with -
"->*",
"->",
"--",
"-=",
"-",
// tokens starting with *
"*=",
"*",
// tokens starting with /
"/=",
"/",
// tokens starting with %
"%=",
"%",
// tokens starting with ^
"^=",
"^",
// tokens starting with &
"&=",
"&&",
"&",
// tokens starting with |
"|=",
"||",
"|",
// tokens starting with <
"<<=",
"<<",
"<=",
"<",
// tokens starting with >
">>=",
">>",
">=",
">",
// tokens starting with !
"!=",
"!",
// tokens starting with =
"==",
"=",
// single tokens
"~",
";",
"?",
",",
"{",
"}",
"[",
"]",
"(",
")",
};
for (auto punct : punctuations)
if (bump_if(ptr, punct))
return cpp_token(cpp_token_kind::punctuation, punct);
return type_safe::nullopt;
}
}
cpp_token_string cpp_token_string::tokenize(std::string str)
{
cpp_token_string::builder builder;
auto ptr = str.c_str();
while (*ptr)
{
if (auto num = numeric_literal_token(ptr))
builder.add_token(num.value());
else if (auto char_lit = character_literal(ptr))
builder.add_token(char_lit.value());
else if (auto str_lit = string_literal(ptr))
builder.add_token(str_lit.value());
else if (auto digraphs = digraph_token(ptr))
builder.add_token(digraphs.value());
else if (auto punct = punctuation_token(ptr))
builder.add_token(punct.value());
else if (auto id = identifier_token(ptr))
builder.add_token(id.value());
else if (*ptr == ' ' || *ptr == '\t' || *ptr == '\n' || *ptr == '\r')
++ptr;
else
DEBUG_UNREACHABLE(detail::assert_handler{});
}
return builder.finish();
}
namespace
{
bool is_identifier(char c)

View file

@ -71,8 +71,8 @@ namespace
auto access = convert_access(cur);
auto is_virtual = clang_isVirtualBase(cur) != 0u;
detail::tokenizer tokenizer(context.tu, context.file, cur);
detail::token_stream stream(tokenizer, cur);
detail::cxtokenizer tokenizer(context.tu, context.file, cur);
detail::cxtoken_stream stream(tokenizer, cur);
// [<attribute>] [virtual] [<access>] <name>
// can't use spelling to get the name
@ -108,8 +108,8 @@ std::unique_ptr<cpp_entity> detail::parse_cpp_class(const detail::parse_context&
clang_getCursorLexicalParent(cur)))
{
// out-of-line definition
detail::tokenizer tokenizer(context.tu, context.file, cur);
detail::token_stream stream(tokenizer, cur);
detail::cxtokenizer tokenizer(context.tu, context.file, cur);
detail::cxtoken_stream stream(tokenizer, cur);
std::string name = detail::get_cursor_name(cur).c_str();
auto pos = name.find('<');

View file

@ -2,7 +2,7 @@
// This file is subject to the license terms in the LICENSE file
// found in the top-level directory of this distribution.
#include "tokenizer.hpp"
#include "cxtokenizer.hpp"
#include <cctype>
@ -11,7 +11,7 @@
using namespace cppast;
detail::token::token(const CXTranslationUnit& tu_unit, const CXToken& token)
detail::cxtoken::cxtoken(const CXTranslationUnit& tu_unit, const CXToken& token)
: value_(clang_getTokenSpelling(tu_unit, token)), kind_(clang_getTokenKind(token))
{
}
@ -237,7 +237,8 @@ namespace
}
}
detail::tokenizer::tokenizer(const CXTranslationUnit& tu, const CXFile& file, const CXCursor& cur)
detail::cxtokenizer::cxtokenizer(const CXTranslationUnit& tu, const CXFile& file,
const CXCursor& cur)
{
auto extent = get_extent(tu, file, cur, unmunch_);
@ -247,7 +248,7 @@ detail::tokenizer::tokenizer(const CXTranslationUnit& tu, const CXFile& file, co
tokens_.emplace_back(tu, tokenizer[i]);
}
void detail::skip(detail::token_stream& stream, const char* str)
void detail::skip(detail::cxtoken_stream& stream, const char* str)
{
if (*str)
{
@ -263,7 +264,7 @@ void detail::skip(detail::token_stream& stream, const char* str)
namespace
{
bool starts_with(const char*& str, const detail::token& t)
bool starts_with(const char*& str, const detail::cxtoken& t)
{
if (std::strncmp(str, t.c_str(), t.value().length()) != 0)
return false;
@ -274,7 +275,7 @@ namespace
}
}
bool detail::skip_if(detail::token_stream& stream, const char* str, bool multi_token)
bool detail::skip_if(detail::cxtoken_stream& stream, const char* str, bool multi_token)
{
if (!*str)
return true;
@ -298,7 +299,7 @@ namespace
{
// whether or not the current angle bracket can be a comparison
// note: this is a heuristic I hope works often enough
bool is_comparison(CXTokenKind last_kind, const detail::token& cur, CXTokenKind next_kind)
bool is_comparison(CXTokenKind last_kind, const detail::cxtoken& cur, CXTokenKind next_kind)
{
if (cur == "<")
return last_kind == CXToken_Literal;
@ -308,7 +309,7 @@ namespace
}
}
detail::token_iterator detail::find_closing_bracket(detail::token_stream stream)
detail::cxtoken_iterator detail::find_closing_bracket(detail::cxtoken_stream stream)
{
auto template_bracket = false;
auto open_bracket = stream.peek().c_str();
@ -359,7 +360,7 @@ detail::token_iterator detail::find_closing_bracket(detail::token_stream stream)
return stream.cur();
}
void detail::skip_brackets(detail::token_stream& stream)
void detail::skip_brackets(detail::cxtoken_stream& stream)
{
auto closing = find_closing_bracket(stream);
stream.set_cur(std::next(closing));
@ -367,7 +368,7 @@ void detail::skip_brackets(detail::token_stream& stream)
namespace
{
bool skip_attribute_impl(detail::token_stream& stream)
bool skip_attribute_impl(detail::cxtoken_stream& stream)
{
if (skip_if(stream, "[") && stream.peek() == "[")
{
@ -401,7 +402,7 @@ namespace
}
}
bool detail::skip_attribute(detail::token_stream& stream)
bool detail::skip_attribute(detail::cxtoken_stream& stream)
{
auto any = false;
while (skip_attribute_impl(stream))
@ -411,9 +412,9 @@ bool detail::skip_attribute(detail::token_stream& stream)
namespace
{
cpp_token_kind get_kind(CXTokenKind kind)
cpp_token_kind get_kind(const detail::cxtoken& token)
{
switch (kind)
switch (token.kind())
{
case CXToken_Punctuation:
return cpp_token_kind::punctuation;
@ -421,25 +422,37 @@ namespace
return cpp_token_kind::keyword;
case CXToken_Identifier:
return cpp_token_kind::identifier;
case CXToken_Literal:
return cpp_token_kind::literal;
{
auto spelling = token.value().std_str();
if (spelling.find('.') != std::string::npos)
return cpp_token_kind::float_literal;
else if (std::isdigit(spelling.front()))
return cpp_token_kind::int_literal;
else if (spelling.back() == '\'')
return cpp_token_kind::char_literal;
else
return cpp_token_kind::string_literal;
}
case CXToken_Comment:
break;
}
DEBUG_UNREACHABLE(detail::assert_handler{});
return cpp_token_kind ::literal;
return cpp_token_kind::punctuation;
}
}
cpp_token_string detail::to_string(token_stream& stream, token_iterator end)
cpp_token_string detail::to_string(cxtoken_stream& stream, cxtoken_iterator end)
{
cpp_token_string::builder builder;
while (stream.cur() != end)
{
auto& token = stream.get();
builder.add_token(cpp_token(get_kind(token.kind()), token.c_str()));
builder.add_token(cpp_token(get_kind(token), token.c_str()));
}
if (stream.unmunch())
@ -448,7 +461,7 @@ cpp_token_string detail::to_string(token_stream& stream, token_iterator end)
return builder.finish();
}
bool detail::append_scope(detail::token_stream& stream, std::string& scope)
bool detail::append_scope(detail::cxtoken_stream& stream, std::string& scope)
{
// add identifiers and "::" to current scope name,
// clear if there is any other token in between, or mismatched combination

View file

@ -2,8 +2,8 @@
// This file is subject to the license terms in the LICENSE file
// found in the top-level directory of this distribution.
#ifndef CPPAST_TOKENIZER_HPP_INCLUDED
#define CPPAST_TOKENIZER_HPP_INCLUDED
#ifndef CPPAST_CXTOKENIZER_HPP_INCLUDED
#define CPPAST_CXTOKENIZER_HPP_INCLUDED
#include <string>
#include <vector>
@ -16,10 +16,10 @@ namespace cppast
{
namespace detail
{
class token
class cxtoken
{
public:
explicit token(const CXTranslationUnit& tu_unit, const CXToken& token);
explicit cxtoken(const CXTranslationUnit& tu_unit, const CXToken& token);
const cxstring& value() const noexcept
{
@ -41,40 +41,40 @@ namespace cppast
CXTokenKind kind_;
};
inline bool operator==(const token& tok, const char* str) noexcept
inline bool operator==(const cxtoken& tok, const char* str) noexcept
{
return tok.value() == str;
}
inline bool operator==(const char* str, const token& tok) noexcept
inline bool operator==(const char* str, const cxtoken& tok) noexcept
{
return str == tok.value();
}
inline bool operator!=(const token& tok, const char* str) noexcept
inline bool operator!=(const cxtoken& tok, const char* str) noexcept
{
return !(tok == str);
}
inline bool operator!=(const char* str, const token& tok) noexcept
inline bool operator!=(const char* str, const cxtoken& tok) noexcept
{
return !(str == tok);
}
using token_iterator = std::vector<token>::const_iterator;
using cxtoken_iterator = std::vector<cxtoken>::const_iterator;
class tokenizer
class cxtokenizer
{
public:
explicit tokenizer(const CXTranslationUnit& tu, const CXFile& file,
const CXCursor& cur);
explicit cxtokenizer(const CXTranslationUnit& tu, const CXFile& file,
const CXCursor& cur);
token_iterator begin() const noexcept
cxtoken_iterator begin() const noexcept
{
return tokens_.begin();
}
token_iterator end() const noexcept
cxtoken_iterator end() const noexcept
{
return tokens_.end();
}
@ -88,14 +88,14 @@ namespace cppast
}
private:
std::vector<token> tokens_;
bool unmunch_;
std::vector<cxtoken> tokens_;
bool unmunch_;
};
class token_stream
class cxtoken_stream
{
public:
explicit token_stream(const tokenizer& tokenizer, const CXCursor& cur)
explicit cxtoken_stream(const cxtokenizer& tokenizer, const CXCursor& cur)
: cursor_(cur),
begin_(tokenizer.begin()),
cur_(begin_),
@ -104,7 +104,7 @@ namespace cppast
{
}
const token& peek() const noexcept
const cxtoken& peek() const noexcept
{
if (done())
return *std::prev(end_);
@ -123,7 +123,7 @@ namespace cppast
--cur_;
}
const token& get() noexcept
const cxtoken& get() noexcept
{
auto& result = peek();
bump();
@ -140,22 +140,22 @@ namespace cppast
return cursor_;
}
token_iterator begin() const noexcept
cxtoken_iterator begin() const noexcept
{
return begin_;
}
token_iterator cur() const noexcept
cxtoken_iterator cur() const noexcept
{
return cur_;
}
token_iterator end() const noexcept
cxtoken_iterator end() const noexcept
{
return end_;
}
void set_cur(token_iterator iter) noexcept
void set_cur(cxtoken_iterator iter) noexcept
{
cur_ = iter;
}
@ -166,41 +166,41 @@ namespace cppast
}
private:
CXCursor cursor_;
token_iterator begin_, cur_, end_;
bool unmunch_;
CXCursor cursor_;
cxtoken_iterator begin_, cur_, end_;
bool unmunch_;
};
// skips the next token
// asserts that it has the given string
void skip(token_stream& stream, const char* str);
void skip(cxtoken_stream& stream, const char* str);
// skips the next token if it has the given string
// if multi_token == true, str can consist of multiple tokens optionally separated by whitespace
bool skip_if(token_stream& stream, const char* str, bool multi_token = false);
bool skip_if(cxtoken_stream& stream, const char* str, bool multi_token = false);
// returns the location of the closing bracket
// the current token must be (,[,{ or <
// note: < might not work in the arguments of a template specialization
token_iterator find_closing_bracket(token_stream stream);
cxtoken_iterator find_closing_bracket(cxtoken_stream stream);
// skips brackets
// the current token must be (,[,{ or <
// note: < might not work in the arguments of a template specialization
void skip_brackets(token_stream& stream);
void skip_brackets(cxtoken_stream& stream);
// skips an attribute
bool skip_attribute(token_stream& stream);
bool skip_attribute(cxtoken_stream& stream);
// converts a token range to a string
cpp_token_string to_string(token_stream& stream, token_iterator end);
cpp_token_string to_string(cxtoken_stream& stream, cxtoken_iterator end);
// appends token to scope, if it is still valid
// else clears it
// note: does not consume the token if it is not valid,
// returns false in that case
bool append_scope(token_stream& stream, std::string& scope);
bool append_scope(cxtoken_stream& stream, std::string& scope);
}
} // namespace cppast::detail
#endif // CPPAST_TOKENIZER_HPP_INCLUDED
#endif // CPPAST_CXTOKENIZER_HPP_INCLUDED

View file

@ -7,7 +7,7 @@
#include <cstdio>
#include <mutex>
#include "tokenizer.hpp"
#include "cxtokenizer.hpp"
using namespace cppast;
@ -50,7 +50,7 @@ void detail::print_tokens(const CXTranslationUnit& tu, const CXFile& file,
const CXCursor& cur) noexcept
{
std::lock_guard<std::mutex> lock(mtx);
detail::tokenizer tokenizer(tu, file, cur);
detail::cxtokenizer tokenizer(tu, file, cur);
for (auto& token : tokenizer)
std::fprintf(stderr, "%s ", token.c_str());
std::fputs("\n", stderr);

View file

@ -20,8 +20,8 @@ namespace
DEBUG_ASSERT(cur.kind == CXCursor_EnumConstantDecl, detail::parse_error_handler{}, cur,
"unexpected child cursor of enum");
detail::tokenizer tokenizer(context.tu, context.file, cur);
detail::token_stream stream(tokenizer, cur);
detail::cxtokenizer tokenizer(context.tu, context.file, cur);
detail::cxtoken_stream stream(tokenizer, cur);
// <identifier> [<attribute>],
// or: <identifier> [<attribute>] = <expression>,
@ -47,9 +47,9 @@ namespace
cpp_enum::builder make_enum_builder(const detail::parse_context& context, const CXCursor& cur,
type_safe::optional<cpp_entity_ref>& semantic_parent)
{
auto name = detail::get_cursor_name(cur);
detail::tokenizer tokenizer(context.tu, context.file, cur);
detail::token_stream stream(tokenizer, cur);
auto name = detail::get_cursor_name(cur);
detail::cxtokenizer tokenizer(context.tu, context.file, cur);
detail::cxtoken_stream stream(tokenizer, cur);
// [<attribute>] enum [class] [<attribute>] name [: type] {
detail::skip_attribute(stream);

View file

@ -14,8 +14,8 @@ std::unique_ptr<cpp_expression> detail::parse_expression(const detail::parse_con
auto kind = clang_getCursorKind(cur);
DEBUG_ASSERT(clang_isExpression(kind), detail::assert_handler{});
detail::tokenizer tokenizer(context.tu, context.file, cur);
detail::token_stream stream(tokenizer, cur);
detail::cxtokenizer tokenizer(context.tu, context.file, cur);
detail::cxtoken_stream stream(tokenizer, cur);
auto type = parse_type(context, cur, clang_getCursorType(cur));
auto expr = to_string(stream, stream.end());
@ -36,8 +36,8 @@ std::unique_ptr<cpp_expression> detail::parse_expression(const detail::parse_con
}
std::unique_ptr<cpp_expression> detail::parse_raw_expression(const parse_context&,
token_stream& stream,
token_iterator end,
cxtoken_stream& stream,
cxtoken_iterator end,
std::unique_ptr<cpp_type> type)
{
if (stream.done())

View file

@ -92,7 +92,7 @@ namespace
}
// precondition: after the name
void skip_parameters(detail::token_stream& stream)
void skip_parameters(detail::cxtoken_stream& stream)
{
if (stream.peek() == "<")
// specialization arguments
@ -218,7 +218,7 @@ namespace
bool is_friend = false;
};
bool prefix_end(detail::token_stream& stream, const char* name, bool is_ctor)
bool prefix_end(detail::cxtoken_stream& stream, const char* name, bool is_ctor)
{
auto cur = stream.cur();
// name can have multiple tokens if it is an operator
@ -262,7 +262,7 @@ namespace
return true;
}
prefix_info parse_prefix_info(detail::token_stream& stream, const char* name, bool is_ctor)
prefix_info parse_prefix_info(detail::cxtoken_stream& stream, const char* name, bool is_ctor)
{
prefix_info result;
@ -302,7 +302,7 @@ namespace
}
};
cpp_cv parse_cv(detail::token_stream& stream)
cpp_cv parse_cv(detail::cxtoken_stream& stream)
{
if (detail::skip_if(stream, "const"))
{
@ -322,7 +322,7 @@ namespace
return cpp_cv_none;
}
cpp_reference parse_ref(detail::token_stream& stream)
cpp_reference parse_ref(detail::cxtoken_stream& stream)
{
if (detail::skip_if(stream, "&"))
return cpp_ref_lvalue;
@ -332,7 +332,7 @@ namespace
return cpp_ref_none;
}
std::unique_ptr<cpp_expression> parse_noexcept(detail::token_stream& stream,
std::unique_ptr<cpp_expression> parse_noexcept(detail::cxtoken_stream& stream,
const detail::parse_context& context)
{
if (!detail::skip_if(stream, "noexcept"))
@ -351,7 +351,7 @@ namespace
return expr;
}
cpp_function_body_kind parse_body_kind(detail::token_stream& stream, bool& pure_virtual)
cpp_function_body_kind parse_body_kind(detail::cxtoken_stream& stream, bool& pure_virtual)
{
pure_virtual = false;
if (detail::skip_if(stream, "default"))
@ -369,7 +369,7 @@ namespace
return cpp_function_declaration;
}
void parse_body(detail::token_stream& stream, suffix_info& result, bool allow_virtual)
void parse_body(detail::cxtoken_stream& stream, suffix_info& result, bool allow_virtual)
{
auto pure_virtual = false;
result.body_kind = parse_body_kind(stream, pure_virtual);
@ -385,7 +385,7 @@ namespace
}
// precondition: we've skipped the function parameters
suffix_info parse_suffix_info(detail::token_stream& stream,
suffix_info parse_suffix_info(detail::cxtoken_stream& stream,
const detail::parse_context& context, bool allow_qualifier,
bool allow_virtual)
{
@ -488,8 +488,8 @@ namespace
{
auto name = detail::get_cursor_name(cur);
detail::tokenizer tokenizer(context.tu, context.file, cur);
detail::token_stream stream(tokenizer, cur);
detail::cxtokenizer tokenizer(context.tu, context.file, cur);
detail::cxtoken_stream stream(tokenizer, cur);
auto prefix = parse_prefix_info(stream, name.c_str(), false);
DEBUG_ASSERT(!prefix.is_virtual && !prefix.is_explicit, detail::parse_error_handler{}, cur,
@ -611,7 +611,7 @@ namespace
template <class Builder>
std::unique_ptr<cpp_entity> handle_suffix(const detail::parse_context& context,
const CXCursor& cur, Builder& builder,
detail::token_stream& stream, bool is_virtual,
detail::cxtoken_stream& stream, bool is_virtual,
type_safe::optional<cpp_entity_ref> semantic_parent)
{
auto allow_qualifiers = set_qualifier(0, builder, cpp_cv_none, cpp_ref_none);
@ -640,8 +640,8 @@ std::unique_ptr<cpp_entity> detail::parse_cpp_member_function(const detail::pars
detail::assert_handler{});
auto name = detail::get_cursor_name(cur);
detail::tokenizer tokenizer(context.tu, context.file, cur);
detail::token_stream stream(tokenizer, cur);
detail::cxtokenizer tokenizer(context.tu, context.file, cur);
detail::cxtoken_stream stream(tokenizer, cur);
auto prefix = parse_prefix_info(stream, name.c_str(), false);
DEBUG_ASSERT(!prefix.is_explicit, detail::parse_error_handler{}, cur,
@ -670,8 +670,8 @@ std::unique_ptr<cpp_entity> detail::parse_cpp_conversion_op(const detail::parse_
|| clang_getTemplateCursorKind(cur) == CXCursor_ConversionFunction,
detail::assert_handler{});
detail::tokenizer tokenizer(context.tu, context.file, cur);
detail::token_stream stream(tokenizer, cur);
detail::cxtokenizer tokenizer(context.tu, context.file, cur);
detail::cxtoken_stream stream(tokenizer, cur);
auto prefix = parse_prefix_info(stream, "operator", false);
// heuristic to find arguments tokens
@ -735,8 +735,8 @@ std::unique_ptr<cpp_entity> detail::parse_cpp_constructor(const detail::parse_co
if (pos != std::string::npos)
name.erase(pos);
detail::tokenizer tokenizer(context.tu, context.file, cur);
detail::token_stream stream(tokenizer, cur);
detail::cxtokenizer tokenizer(context.tu, context.file, cur);
detail::cxtoken_stream stream(tokenizer, cur);
auto prefix = parse_prefix_info(stream, name.c_str(), true);
DEBUG_ASSERT(!prefix.is_virtual, detail::parse_error_handler{}, cur,
@ -771,8 +771,8 @@ std::unique_ptr<cpp_entity> detail::parse_cpp_destructor(const detail::parse_con
{
DEBUG_ASSERT(clang_getCursorKind(cur) == CXCursor_Destructor, detail::assert_handler{});
detail::tokenizer tokenizer(context.tu, context.file, cur);
detail::token_stream stream(tokenizer, cur);
detail::cxtokenizer tokenizer(context.tu, context.file, cur);
detail::cxtoken_stream stream(tokenizer, cur);
auto prefix_info = parse_prefix_info(stream, "~", false);
DEBUG_ASSERT(!prefix_info.is_constexpr && !prefix_info.is_explicit, detail::assert_handler{});

View file

@ -17,8 +17,8 @@ std::unique_ptr<cpp_entity> detail::try_parse_cpp_language_linkage(const parse_c
DEBUG_ASSERT(cur.kind == CXCursor_UnexposedDecl,
detail::assert_handler{}); // not exposed currently
detail::tokenizer tokenizer(context.tu, context.file, cur);
detail::token_stream stream(tokenizer, cur);
detail::cxtokenizer tokenizer(context.tu, context.file, cur);
detail::cxtoken_stream stream(tokenizer, cur);
// extern <name> ...
if (!detail::skip_if(stream, "extern"))

View file

@ -15,7 +15,7 @@
#include "parse_error.hpp"
#include "parse_functions.hpp"
#include "preprocessor.hpp"
#include "tokenizer.hpp"
#include "cxtokenizer.hpp"
using namespace cppast;

View file

@ -16,8 +16,8 @@ namespace
cpp_namespace::builder make_ns_builder(const detail::parse_context& context,
const CXCursor& cur)
{
detail::tokenizer tokenizer(context.tu, context.file, cur);
detail::token_stream stream(tokenizer, cur);
detail::cxtokenizer tokenizer(context.tu, context.file, cur);
detail::cxtoken_stream stream(tokenizer, cur);
// [inline] namespace [<attribute>] <identifier> {
auto is_inline = false;
@ -83,8 +83,8 @@ std::unique_ptr<cpp_entity> detail::parse_cpp_namespace_alias(const detail::pars
{
DEBUG_ASSERT(cur.kind == CXCursor_NamespaceAlias, detail::assert_handler{});
detail::tokenizer tokenizer(context.tu, context.file, cur);
detail::token_stream stream(tokenizer, cur);
detail::cxtokenizer tokenizer(context.tu, context.file, cur);
detail::cxtoken_stream stream(tokenizer, cur);
// namespace <identifier> = <nested identifier>;
detail::skip(stream, "namespace");
@ -108,8 +108,8 @@ std::unique_ptr<cpp_entity> detail::parse_cpp_using_directive(const detail::pars
{
DEBUG_ASSERT(cur.kind == CXCursor_UsingDirective, detail::assert_handler{});
detail::tokenizer tokenizer(context.tu, context.file, cur);
detail::token_stream stream(tokenizer, cur);
detail::cxtokenizer tokenizer(context.tu, context.file, cur);
detail::cxtoken_stream stream(tokenizer, cur);
// using namespace <nested identifier>;
detail::skip(stream, "using");
@ -182,8 +182,8 @@ std::unique_ptr<cpp_entity> detail::parse_cpp_using_declaration(
{
DEBUG_ASSERT(cur.kind == CXCursor_UsingDeclaration, detail::assert_handler{});
detail::tokenizer tokenizer(context.tu, context.file, cur);
detail::token_stream stream(tokenizer, cur);
detail::cxtokenizer tokenizer(context.tu, context.file, cur);
detail::cxtoken_stream stream(tokenizer, cur);
// using <nested identifier>;
detail::skip(stream, "using");

View file

@ -215,10 +215,10 @@ std::unique_ptr<cpp_entity> detail::parse_entity(const detail::parse_context& co
detail::get_cursor_kind_spelling(cur).c_str(), "'"));
// build unexposed entity
auto name = detail::get_cursor_name(cur);
detail::tokenizer tokenizer(context.tu, context.file, cur);
detail::token_stream stream(tokenizer, cur);
auto spelling = detail::to_string(stream, stream.end());
auto name = detail::get_cursor_name(cur);
detail::cxtokenizer tokenizer(context.tu, context.file, cur);
detail::cxtoken_stream stream(tokenizer, cur);
auto spelling = detail::to_string(stream, stream.end());
std::unique_ptr<cppast::cpp_entity> entity;
if (name.empty())

View file

@ -9,7 +9,7 @@
#include <cppast/parser.hpp>
#include "raii_wrapper.hpp"
#include "tokenizer.hpp" // for convenience
#include "cxtokenizer.hpp" // for convenience
#include "parse_error.hpp" // for convenience
#include "preprocessor.hpp"
@ -76,8 +76,8 @@ namespace cppast
// and ends at the given iterator
// this is required for situations where there is no type exposed,
// like default type of a template type parameter
std::unique_ptr<cpp_type> parse_raw_type(const parse_context& context, token_stream& stream,
token_iterator end);
std::unique_ptr<cpp_type> parse_raw_type(const parse_context& context,
cxtoken_stream& stream, cxtoken_iterator end);
std::unique_ptr<cpp_expression> parse_expression(const parse_context& context,
const CXCursor& cur);
@ -86,8 +86,8 @@ namespace cppast
// this is required for situations where there is no expression cursor exposed,
// like member initializers
std::unique_ptr<cpp_expression> parse_raw_expression(const parse_context& context,
token_stream& stream,
token_iterator end,
cxtoken_stream& stream,
cxtoken_iterator end,
std::unique_ptr<cpp_type> type);
// parse_entity() dispatches on the cursor type

View file

@ -50,9 +50,9 @@ namespace
DEBUG_ASSERT(clang_getCursorKind(cur) == CXCursor_TemplateTypeParameter,
detail::assert_handler{});
detail::tokenizer tokenizer(context.tu, context.file, cur);
detail::token_stream stream(tokenizer, cur);
auto name = detail::get_cursor_name(cur);
detail::cxtokenizer tokenizer(context.tu, context.file, cur);
detail::cxtoken_stream stream(tokenizer, cur);
auto name = detail::get_cursor_name(cur);
// syntax: typename/class [...] name [= ...]
auto keyword = cpp_template_keyword::keyword_class;
@ -87,8 +87,8 @@ namespace
auto type = clang_getCursorType(cur);
auto def = detail::parse_default_value(context, cur, name.c_str());
detail::tokenizer tokenizer(context.tu, context.file, cur);
detail::token_stream stream(tokenizer, cur);
detail::cxtokenizer tokenizer(context.tu, context.file, cur);
detail::cxtoken_stream stream(tokenizer, cur);
// see if it is variadic
// syntax a): some-tokens ... name some-tokens
@ -120,9 +120,9 @@ namespace
DEBUG_ASSERT(clang_getCursorKind(cur) == CXCursor_TemplateTemplateParameter,
detail::assert_handler{});
detail::tokenizer tokenizer(context.tu, context.file, cur);
detail::token_stream stream(tokenizer, cur);
auto name = detail::get_cursor_name(cur);
detail::cxtokenizer tokenizer(context.tu, context.file, cur);
detail::cxtoken_stream stream(tokenizer, cur);
auto name = detail::get_cursor_name(cur);
// syntax: template <…> class/typename [...] name [= …]
detail::skip(stream, "template");
@ -263,8 +263,8 @@ namespace
template <class Builder>
void parse_arguments(Builder& b, const detail::parse_context& context, const CXCursor& cur)
{
detail::tokenizer tokenizer(context.tu, context.file, cur);
detail::token_stream stream(tokenizer, cur);
detail::cxtokenizer tokenizer(context.tu, context.file, cur);
detail::cxtoken_stream stream(tokenizer, cur);
while (!stream.done()
&& !detail::skip_if(stream, detail::get_cursor_name(cur).c_str(), true))

View file

@ -246,7 +246,7 @@ namespace
return size_expr.empty() ?
nullptr :
cpp_unexposed_expression::build(cpp_builtin_type::build(cpp_ulonglong),
cpp_token_string::from_string(
cpp_token_string::tokenize(
std::string(size_expr.rbegin(),
size_expr.rend())));
}
@ -488,7 +488,7 @@ namespace
return cpp_decltype_type::build(
cpp_unexposed_expression::build(cpp_unexposed_type::build("<decltype>"),
cpp_token_string::from_string(spelling)));
cpp_token_string::tokenize(spelling)));
});
}
@ -731,8 +731,8 @@ std::unique_ptr<cpp_type> detail::parse_type(const detail::parse_context& contex
}
std::unique_ptr<cpp_type> detail::parse_raw_type(const detail::parse_context&,
detail::token_stream& stream,
detail::token_iterator end)
detail::cxtoken_stream& stream,
detail::cxtoken_iterator end)
{
auto result = detail::to_string(stream, end);
return cpp_unexposed_type::build(result.as_string());

View file

@ -14,8 +14,8 @@ using namespace cppast;
std::unique_ptr<cpp_expression> detail::parse_default_value(const detail::parse_context& context,
const CXCursor& cur, const char* name)
{
detail::tokenizer tokenizer(context.tu, context.file, cur);
detail::token_stream stream(tokenizer, cur);
detail::cxtokenizer tokenizer(context.tu, context.file, cur);
detail::cxtoken_stream stream(tokenizer, cur);
auto has_default = false;
auto got_name = *name == '\0';
@ -57,7 +57,7 @@ std::unique_ptr<cpp_entity> detail::parse_cpp_variable(const detail::parse_conte
// just look for thread local or constexpr
// can't appear anywhere else, so good enough
detail::tokenizer tokenizer(context.tu, context.file, cur);
detail::cxtokenizer tokenizer(context.tu, context.file, cur);
for (auto& token : tokenizer)
if (token.value() == "thread_local")
storage_class =

View file

@ -24,6 +24,7 @@ set(tests
cpp_preprocessor.cpp
cpp_static_assert.cpp
cpp_template_parameter.cpp
cpp_token.cpp
cpp_type_alias.cpp
cpp_variable.cpp
integration.cpp

View file

@ -104,7 +104,7 @@ void ns::l()
*cpp_unexposed_expression::
build(cpp_pointer_type::build(
cpp_builtin_type::build(cpp_float)),
cpp_token_string::from_string("nullptr"))));
cpp_token_string::tokenize("nullptr"))));
}
else
REQUIRE(false);
@ -135,7 +135,7 @@ void ns::l()
*cpp_decltype_type::build(
cpp_unexposed_expression::
build(cpp_builtin_type::build(cpp_int),
cpp_token_string::from_string("42")))));
cpp_token_string::tokenize("42")))));
REQUIRE(!param.default_value());
}
else
@ -162,16 +162,17 @@ void ns::l()
equal_expressions(func.noexcept_condition().value(),
*cpp_literal_expression::build(std::move(bool_t), "true")));
else if (func.name() == "e")
REQUIRE(equal_expressions(func.noexcept_condition().value(),
*cpp_unexposed_expression::
build(std::move(bool_t),
cpp_token_string::from_string("false"))));
REQUIRE(
equal_expressions(func.noexcept_condition().value(),
*cpp_unexposed_expression::build(std::move(bool_t),
cpp_token_string::tokenize(
"false"))));
else if (func.name() == "f")
REQUIRE(
equal_expressions(func.noexcept_condition().value(),
*cpp_unexposed_expression::
build(std::move(bool_t),
cpp_token_string::from_string("noexcept(d())"))));
*cpp_unexposed_expression::build(std::move(bool_t),
cpp_token_string::tokenize(
"noexcept(d())"))));
}
else if (func.name() == "g" || func.name() == "h" || func.name() == "i"
|| func.name() == "j")

View file

@ -402,12 +402,11 @@ d::~d() {}
REQUIRE(!dtor.is_virtual());
REQUIRE(dtor.body_kind() == cpp_function_definition);
REQUIRE(dtor.noexcept_condition());
REQUIRE(
equal_expressions(dtor.noexcept_condition().value(),
*cpp_unexposed_expression::build(cpp_builtin_type::build(
cpp_bool),
cpp_token_string::from_string(
"false"))));
REQUIRE(equal_expressions(dtor.noexcept_condition().value(),
*cpp_unexposed_expression::build(cpp_builtin_type::build(
cpp_bool),
cpp_token_string::tokenize(
"false"))));
}
else if (dtor.name() == "~c")
{

View file

@ -39,7 +39,7 @@ struct foo
// all initializers are unexposed
auto def = cpp_unexposed_expression::build(cpp_builtin_type::build(cpp_float),
cpp_token_string::from_string("3.14f"));
cpp_token_string::tokenize("3.14f"));
REQUIRE(var.default_value());
REQUIRE(equal_expressions(var.default_value().value(), *def));

View file

@ -34,17 +34,15 @@ struct foo
REQUIRE(equal_expressions(assert.expression(),
*cpp_literal_expression::build(std::move(bool_t), "true")));
else if (assert.message() == "a")
REQUIRE(
equal_expressions(assert.expression(),
*cpp_unexposed_expression::build(std::move(bool_t),
cpp_token_string::from_string(
"true||false"))));
REQUIRE(equal_expressions(assert.expression(),
*cpp_unexposed_expression::build(std::move(bool_t),
cpp_token_string::tokenize(
"true||false"))));
else if (assert.message() == "b")
REQUIRE(
equal_expressions(assert.expression(),
*cpp_unexposed_expression::build(std::move(bool_t),
cpp_token_string::from_string(
"!B"))));
REQUIRE(equal_expressions(assert.expression(),
*cpp_unexposed_expression::build(std::move(bool_t),
cpp_token_string::tokenize(
"!B"))));
else
REQUIRE(false);
});

View file

@ -151,13 +151,13 @@ using d = void;
cpp_builtin_type::build(cpp_char))));
REQUIRE(!param.is_variadic());
REQUIRE(param.default_value());
REQUIRE(equal_expressions(param.default_value().value(),
*cpp_unexposed_expression::
build(cpp_builtin_type::build(
cpp_nullptr),
cpp_token_string::
from_string(
"nullptr"))));
REQUIRE(
equal_expressions(param.default_value().value(),
*cpp_unexposed_expression::
build(cpp_builtin_type::build(
cpp_nullptr),
cpp_token_string::tokenize(
"nullptr"))));
}
else if (param.name() == "C")
{

129
test/cpp_token.cpp Normal file
View file

@ -0,0 +1,129 @@
// Copyright (C) 2017 Jonathan Müller <jonathanmueller.dev@gmail.com>
// This file is subject to the license terms in the LICENSE file
// found in the top-level directory of this distribution.
#include <cppast/cpp_token.hpp>
#include <catch.hpp>
#include <algorithm>
#include <initializer_list>
using namespace cppast;
void check_equal_tokens(const std::string& str, std::initializer_list<cpp_token> tokens)
{
auto token_str = cpp_token_string::tokenize(str);
INFO(str);
REQUIRE(token_str.end() - token_str.begin() == tokens.size());
REQUIRE(std::equal(token_str.begin(), token_str.end(), tokens.begin()));
}
TEST_CASE("tokenizer")
{
SECTION("integer literals")
{
check_equal_tokens(" 1234 ", {cpp_token(cpp_token_kind::int_literal, "1234")});
check_equal_tokens("1, 2", {cpp_token(cpp_token_kind::int_literal, "1"),
cpp_token(cpp_token_kind::punctuation, ","),
cpp_token(cpp_token_kind::int_literal, "2")});
// integer suffixes
check_equal_tokens("1234ul", {cpp_token(cpp_token_kind::int_literal, "1234ul")});
check_equal_tokens("12'34LU", {cpp_token(cpp_token_kind::int_literal, "1234LU")});
// other integer formats
check_equal_tokens("01234", {cpp_token(cpp_token_kind::int_literal, "01234")});
check_equal_tokens("0x1234AF", {cpp_token(cpp_token_kind::int_literal, "0x1234AF")});
check_equal_tokens("0b101101", {cpp_token(cpp_token_kind::int_literal, "0b101101")});
}
SECTION("floating point literals")
{
// floating point suffixes
check_equal_tokens("3.14", {cpp_token(cpp_token_kind::float_literal, "3.14")});
check_equal_tokens("3.14f", {cpp_token(cpp_token_kind::float_literal, "3.14f")});
check_equal_tokens("3.14L", {cpp_token(cpp_token_kind::float_literal, "3.14L")});
// missing parts
check_equal_tokens(".5", {cpp_token(cpp_token_kind::float_literal, ".5")});
check_equal_tokens("1.", {cpp_token(cpp_token_kind::float_literal, "1.")});
// exponents
check_equal_tokens("1.0e4", {cpp_token(cpp_token_kind::float_literal, "1.0e4")});
check_equal_tokens("1e4", {cpp_token(cpp_token_kind::float_literal, "1e4")});
check_equal_tokens(".5e-2", {cpp_token(cpp_token_kind::float_literal, ".5e-2")});
// hexadecimal
check_equal_tokens("0xabc.def", {cpp_token(cpp_token_kind::float_literal, "0xabc.def")});
check_equal_tokens("0x123p42", {cpp_token(cpp_token_kind::float_literal, "0x123p42")});
}
SECTION("character literals")
{
check_equal_tokens(R"('a')", {cpp_token(cpp_token_kind::char_literal, R"('a')")});
check_equal_tokens(R"(u8'a')", {cpp_token(cpp_token_kind::char_literal, R"(u8'a')")});
check_equal_tokens(R"(U'a')", {cpp_token(cpp_token_kind::char_literal, R"(U'a')")});
check_equal_tokens(R"('\'')", {cpp_token(cpp_token_kind::char_literal, R"('\'')")});
}
SECTION("string literals")
{
check_equal_tokens(R"("hello")", {cpp_token(cpp_token_kind::string_literal, R"("hello")")});
check_equal_tokens(R"(u8"he\"llo")",
{cpp_token(cpp_token_kind::string_literal, R"(u8"he\"llo")")});
check_equal_tokens(R"*(R"(hel\"lo)")*",
{cpp_token(cpp_token_kind::string_literal, R"*(R"(hel\"lo)")*")});
check_equal_tokens(R"**(R"*(hello R"(foo)")*")**",
{cpp_token(cpp_token_kind::string_literal,
R"**(R"*(hello R"(foo)")*")**")});
}
SECTION("UDLs")
{
check_equal_tokens("123_foo", {cpp_token(cpp_token_kind::int_literal, "123_foo")});
check_equal_tokens("123.456_foo",
{cpp_token(cpp_token_kind::float_literal, "123.456_foo")});
check_equal_tokens(R"("hi"_foo)",
{cpp_token(cpp_token_kind::string_literal, R"("hi"_foo)")});
}
SECTION("identifiers")
{
check_equal_tokens("foo bar baz_a", {cpp_token(cpp_token_kind::identifier, "foo"),
cpp_token(cpp_token_kind::identifier, "bar"),
cpp_token(cpp_token_kind::identifier, "baz_a")});
check_equal_tokens("constant", {cpp_token(cpp_token_kind::identifier, "constant")});
}
SECTION("keywords")
{
// just test some
check_equal_tokens("const float auto", {cpp_token(cpp_token_kind::keyword, "const"),
cpp_token(cpp_token_kind::keyword, "float"),
cpp_token(cpp_token_kind::keyword, "auto")});
}
SECTION("punctuations")
{
// just test munch things
check_equal_tokens("<< <= <", {cpp_token(cpp_token_kind::punctuation, "<<"),
cpp_token(cpp_token_kind::punctuation, "<="),
cpp_token(cpp_token_kind::punctuation, "<")});
check_equal_tokens("- -- -> ->*", {cpp_token(cpp_token_kind::punctuation, "-"),
cpp_token(cpp_token_kind::punctuation, "--"),
cpp_token(cpp_token_kind::punctuation, "->"),
cpp_token(cpp_token_kind::punctuation, "->*")});
check_equal_tokens("--->>>>", {cpp_token(cpp_token_kind::punctuation, "--"),
cpp_token(cpp_token_kind::punctuation, "->"),
cpp_token(cpp_token_kind::punctuation, ">>"),
cpp_token(cpp_token_kind::punctuation, ">")});
// alternative spellings
check_equal_tokens("and not xor", {cpp_token(cpp_token_kind::punctuation, "&&"),
cpp_token(cpp_token_kind::punctuation, "!"),
cpp_token(cpp_token_kind::punctuation, "^")});
// digraphs
check_equal_tokens("<% foo<::bar>", {cpp_token(cpp_token_kind::punctuation, "{"),
cpp_token(cpp_token_kind::identifier, "foo"),
cpp_token(cpp_token_kind::punctuation, "<"),
cpp_token(cpp_token_kind::punctuation, "::"),
cpp_token(cpp_token_kind::identifier, "bar"),
cpp_token(cpp_token_kind::punctuation, ">")});
}
}

View file

@ -334,7 +334,7 @@ typedef decltype(0) w;
return cpp_literal_expression::build(std::move(type), std::move(size));
else
return cpp_unexposed_expression::build(std::move(type),
cpp_token_string::from_string(std::move(size)));
cpp_token_string::tokenize(std::move(size)));
};
cpp_entity_index idx;
@ -507,7 +507,7 @@ typedef decltype(0) w;
{
auto type = cpp_decltype_type::build(
cpp_unexposed_expression::build(cpp_builtin_type::build(cpp_int),
cpp_token_string::from_string("0")));
cpp_token_string::tokenize("0")));
REQUIRE(equal_types(idx, alias.underlying_type(), *type));
}
else

View file

@ -101,14 +101,13 @@ int r[] = {0};
// unexposed due to implicit cast, I think
type_safe::ref(
*cpp_unexposed_expression::build(cpp_builtin_type::build(cpp_int),
cpp_token_string::from_string(
"42"))),
cpp_token_string::tokenize("42"))),
cpp_storage_class_none, false, false);
else if (var.name() == "c")
check_variable(var, *cpp_builtin_type::build(cpp_float),
type_safe::ref(
*cpp_unexposed_expression::build(cpp_builtin_type::build(cpp_float),
cpp_token_string::from_string(
cpp_token_string::tokenize(
"3.f+0.14f"))),
cpp_storage_class_none, false, false);
else if (var.name() == "d")
@ -126,8 +125,7 @@ int r[] = {0};
cpp_cv_const),
type_safe::ref(
*cpp_unexposed_expression::build(cpp_builtin_type::build(cpp_int),
cpp_token_string::from_string(
"12"))),
cpp_token_string::tokenize("12"))),
cpp_storage_class_none, true, false);
else if (var.name() == "i")
{
@ -147,7 +145,7 @@ int r[] = {0};
*cpp_unexposed_expression::build(cpp_user_defined_type::build(
cpp_type_ref(cpp_entity_id(""),
"bar")),
cpp_token_string::from_string(
cpp_token_string::tokenize(
"bar()"))),
cpp_storage_class_none, false, false);
return false;
@ -169,8 +167,7 @@ int r[] = {0};
check_variable(var, *cpp_auto_type::build(),
type_safe::ref(
*cpp_unexposed_expression::build(cpp_builtin_type::build(cpp_int),
cpp_token_string::from_string(
"128"))),
cpp_token_string::tokenize("128"))),
cpp_storage_class_none, false, false);
else if (var.name() == "n")
check_variable(var,
@ -180,14 +177,13 @@ int r[] = {0};
cpp_ref_lvalue),
type_safe::ref(
*cpp_unexposed_expression::build(cpp_builtin_type::build(cpp_int),
cpp_token_string::from_string(
"m"))),
cpp_token_string::tokenize("m"))),
cpp_storage_class_none, false, false);
else if (var.name() == "o")
check_variable(var,
*cpp_decltype_type::build(
cpp_unexposed_expression::build(cpp_builtin_type::build(cpp_int),
cpp_token_string::from_string("0"))),
cpp_token_string::tokenize("0"))),
nullptr, cpp_storage_class_none, false, false);
else if (var.name() == "p")
check_variable(var,
@ -196,13 +192,12 @@ int r[] = {0};
build(cpp_decltype_type::build(
cpp_unexposed_expression::
build(cpp_builtin_type::build(cpp_int),
cpp_token_string::from_string("o"))),
cpp_token_string::tokenize("o"))),
cpp_cv_const),
cpp_ref_lvalue),
type_safe::ref(
*cpp_unexposed_expression::build(cpp_builtin_type::build(cpp_int),
cpp_token_string::from_string(
"o"))),
cpp_token_string::tokenize("o"))),
cpp_storage_class_none, false, false);
else if (var.name() == "q")
check_variable(var,
@ -219,8 +214,7 @@ int r[] = {0};
"1")),
type_safe::ref(
*cpp_unexposed_expression::build(cpp_unexposed_type::build(""),
cpp_token_string::from_string(
"{0}"))),
cpp_token_string::tokenize("{0}"))),
cpp_storage_class_none, false, false);
else
REQUIRE(false);