Add function to tokenize strings
This commit is contained in:
parent
cfac41c7b5
commit
1572920650
14 changed files with 798 additions and 83 deletions
|
|
@ -15,12 +15,13 @@ namespace cppast
|
|||
/// The kinds of C++ tokens.
|
||||
enum class cpp_token_kind
|
||||
{
|
||||
identifier, //< Any identifier.
|
||||
keyword, //< Any keyword.
|
||||
literal, //< Any literal.
|
||||
punctuation, //< Any other punctuation.
|
||||
|
||||
unknown, //< An unknown token.
|
||||
identifier, //< Any identifier.
|
||||
keyword, //< Any keyword.
|
||||
int_literal, //< An integer literal.
|
||||
float_literal, //< A floating point literal.
|
||||
char_literal, //< A character literal.
|
||||
string_literal, //< A string literal.
|
||||
punctuation //< Any other punctuation.
|
||||
};
|
||||
|
||||
/// A C++ token.
|
||||
|
|
@ -74,16 +75,15 @@ namespace cppast
|
|||
std::vector<cpp_token> tokens_;
|
||||
};
|
||||
|
||||
/// Tokenizes a string.
|
||||
/// \effects Splits the string into C++ tokens.
|
||||
/// The string must contain valid tokens and must already be preprocessed (i.e. translation phase 6 is already done).
|
||||
/// \returns The tokenized string.
|
||||
static cpp_token_string tokenize(std::string str);
|
||||
|
||||
/// \effects Creates it from a sequence of tokens.
|
||||
cpp_token_string(std::vector<cpp_token> tokens) : tokens_(std::move(tokens)) {}
|
||||
|
||||
/// \effects Creates from a string.
|
||||
/// \notes This does not do tokenization, it will only store a single, unknown token!
|
||||
static cpp_token_string from_string(std::string str)
|
||||
{
|
||||
return cpp_token_string({cpp_token(cpp_token_kind::unknown, std::move(str))});
|
||||
}
|
||||
|
||||
/// \exclude target
|
||||
using iterator = std::vector<cpp_token>::const_iterator;
|
||||
|
||||
|
|
|
|||
|
|
@ -1158,7 +1158,7 @@ void detail::write_template_arguments(
|
|||
|
||||
void detail::write_token_string(code_generator::output& output, const cpp_token_string& tokens)
|
||||
{
|
||||
auto last_kind = cpp_token_kind::unknown;
|
||||
auto last_kind = cpp_token_kind::punctuation; // neutral regarding whitespace
|
||||
for (auto& token : tokens)
|
||||
{
|
||||
switch (token.kind)
|
||||
|
|
@ -1177,14 +1177,15 @@ void detail::write_token_string(code_generator::output& output, const cpp_token_
|
|||
output << operator_ws;
|
||||
break;
|
||||
|
||||
case cpp_token_kind::literal:
|
||||
// determine kind of literal
|
||||
if (token.spelling.front() == '\"')
|
||||
output << string_literal(token.spelling);
|
||||
else if (token.spelling.find('.') != std::string::npos)
|
||||
output << float_literal(token.spelling);
|
||||
else
|
||||
output << int_literal(token.spelling);
|
||||
case cpp_token_kind::int_literal:
|
||||
output << int_literal(token.spelling);
|
||||
break;
|
||||
case cpp_token_kind::float_literal:
|
||||
output << float_literal(token.spelling);
|
||||
break;
|
||||
case cpp_token_kind::char_literal:
|
||||
case cpp_token_kind::string_literal:
|
||||
output << string_literal(token.spelling);
|
||||
break;
|
||||
|
||||
case cpp_token_kind::punctuation:
|
||||
|
|
@ -1206,9 +1207,6 @@ void detail::write_token_string(code_generator::output& output, const cpp_token_
|
|||
else
|
||||
output << punctuation(token.spelling);
|
||||
break;
|
||||
|
||||
case cpp_token_kind::unknown:
|
||||
output << token_seq(token.spelling);
|
||||
}
|
||||
|
||||
last_kind = token.kind;
|
||||
|
|
|
|||
|
|
@ -6,6 +6,9 @@
|
|||
|
||||
#include <algorithm>
|
||||
#include <cctype>
|
||||
#include <cstring>
|
||||
#include <type_safe/optional.hpp>
|
||||
|
||||
#include <cppast/detail/assert.hpp>
|
||||
|
||||
using namespace cppast;
|
||||
|
|
@ -16,6 +19,586 @@ void cpp_token_string::builder::unmunch()
|
|||
tokens_.back().spelling = ">";
|
||||
}
|
||||
|
||||
namespace
|
||||
{
|
||||
template <std::size_t N>
|
||||
bool starts_with(const char* ptr, const char (&str)[N])
|
||||
{
|
||||
return std::strncmp(ptr, str, N - 1u) == 0;
|
||||
}
|
||||
|
||||
bool starts_with(const char* ptr, const std::string& str)
|
||||
{
|
||||
return std::strncmp(ptr, str.c_str(), str.size()) == 0;
|
||||
}
|
||||
|
||||
template <std::size_t N>
|
||||
bool bump_if(const char*& ptr, const char (&str)[N])
|
||||
{
|
||||
if (starts_with(ptr, str))
|
||||
{
|
||||
ptr += N - 1;
|
||||
return true;
|
||||
}
|
||||
else
|
||||
return false;
|
||||
}
|
||||
|
||||
bool bump_if(const char*& ptr, const std::string& str)
|
||||
{
|
||||
if (starts_with(ptr, str))
|
||||
{
|
||||
ptr += str.size();
|
||||
return true;
|
||||
}
|
||||
else
|
||||
return false;
|
||||
}
|
||||
|
||||
bool is_identifier_nondigit(char c)
|
||||
{
|
||||
// assume ASCII
|
||||
if (c >= 'a' && c <= 'z')
|
||||
return true;
|
||||
else if (c >= 'A' && c <= 'Z')
|
||||
return true;
|
||||
else if (c == '_')
|
||||
return true;
|
||||
else
|
||||
// technically \uXXX is allowed as well, but I haven't seen that used ever
|
||||
return false;
|
||||
}
|
||||
|
||||
bool is_digit(char c)
|
||||
{
|
||||
return c >= '0' && c <= '9';
|
||||
}
|
||||
|
||||
bool is_hexadecimal_digit(char c)
|
||||
{
|
||||
return is_digit(c) || (c >= 'a' && c <= 'f') || (c >= 'A' && c <= 'F');
|
||||
}
|
||||
|
||||
type_safe::optional<std::string> bump_identifier(const char*& ptr)
|
||||
{
|
||||
if (is_identifier_nondigit(*ptr))
|
||||
{
|
||||
std::string result;
|
||||
result += *ptr++;
|
||||
|
||||
while (is_identifier_nondigit(*ptr) || is_digit(*ptr))
|
||||
result += *ptr++;
|
||||
|
||||
return result;
|
||||
}
|
||||
else
|
||||
return type_safe::nullopt;
|
||||
}
|
||||
|
||||
type_safe::optional<cpp_token> identifier_token(const char*& ptr)
|
||||
{
|
||||
auto identifier = bump_identifier(ptr);
|
||||
if (!identifier)
|
||||
return type_safe::nullopt;
|
||||
|
||||
static constexpr const char* keywords[] = {"alignas",
|
||||
"alignof",
|
||||
"asm",
|
||||
"auto",
|
||||
"bool",
|
||||
"break",
|
||||
"case",
|
||||
"catch",
|
||||
"char",
|
||||
"char16_t",
|
||||
"char32_t",
|
||||
"class",
|
||||
"const",
|
||||
"constexpr",
|
||||
"const_cast",
|
||||
"continue",
|
||||
"decltype",
|
||||
"default",
|
||||
"delete",
|
||||
"do",
|
||||
"double",
|
||||
"dynamic_cast",
|
||||
"else",
|
||||
"enum",
|
||||
"explicit",
|
||||
"export",
|
||||
"extern",
|
||||
"false",
|
||||
"float",
|
||||
"for",
|
||||
"friend",
|
||||
"goto",
|
||||
"if",
|
||||
"inline",
|
||||
"int",
|
||||
"long",
|
||||
"mutable",
|
||||
"namespace",
|
||||
"new",
|
||||
"noexcept",
|
||||
"nullptr",
|
||||
"operator",
|
||||
"private",
|
||||
"protected",
|
||||
"public",
|
||||
"register",
|
||||
"reinterpret_cast",
|
||||
"return",
|
||||
"short",
|
||||
"signed",
|
||||
"sizeof",
|
||||
"static",
|
||||
"static_assert",
|
||||
"static_cast",
|
||||
"struct",
|
||||
"switch",
|
||||
"template",
|
||||
"this",
|
||||
"thread_local",
|
||||
"throw",
|
||||
"true",
|
||||
"try",
|
||||
"typedef",
|
||||
"typeid",
|
||||
"typename",
|
||||
"union",
|
||||
"unsigned",
|
||||
"using",
|
||||
"virtual",
|
||||
"void",
|
||||
"volatile",
|
||||
"wchar_t",
|
||||
"while"};
|
||||
auto find_keyword = std::find(std::begin(keywords), std::end(keywords), identifier.value());
|
||||
if (find_keyword != std::end(keywords))
|
||||
return cpp_token(cpp_token_kind::keyword, identifier.value());
|
||||
else if (identifier == "and")
|
||||
return cpp_token(cpp_token_kind::punctuation, "&&");
|
||||
else if (identifier == "and_eq")
|
||||
return cpp_token(cpp_token_kind::punctuation, "&=");
|
||||
else if (identifier == "bitand")
|
||||
return cpp_token(cpp_token_kind::punctuation, "&");
|
||||
else if (identifier == "bitor")
|
||||
return cpp_token(cpp_token_kind::punctuation, "|");
|
||||
else if (identifier == "compl")
|
||||
return cpp_token(cpp_token_kind::punctuation, "~");
|
||||
else if (identifier == "not")
|
||||
return cpp_token(cpp_token_kind::punctuation, "!");
|
||||
else if (identifier == "not_eq")
|
||||
return cpp_token(cpp_token_kind::punctuation, "!=");
|
||||
else if (identifier == "or")
|
||||
return cpp_token(cpp_token_kind::punctuation, "||");
|
||||
else if (identifier == "or_eq")
|
||||
return cpp_token(cpp_token_kind::punctuation, "|=");
|
||||
else if (identifier == "xor")
|
||||
return cpp_token(cpp_token_kind::punctuation, "^");
|
||||
else if (identifier == "xor_eq")
|
||||
return cpp_token(cpp_token_kind::punctuation, "^=");
|
||||
else
|
||||
return cpp_token(cpp_token_kind::identifier, identifier.value());
|
||||
}
|
||||
|
||||
void append_udl_suffix(std::string& literal, const char*& ptr)
|
||||
{
|
||||
if (auto id = identifier_token(ptr))
|
||||
literal += id.value().spelling;
|
||||
}
|
||||
|
||||
template <typename DigitPredicate>
|
||||
std::string parse_digit_sequence(const char*& ptr, DigitPredicate is_digit)
|
||||
{
|
||||
std::string result;
|
||||
for (; is_digit(*ptr) || *ptr == '\''; ++ptr)
|
||||
if (*ptr != '\'')
|
||||
result += *ptr;
|
||||
DEBUG_ASSERT(result.back() != '\'', detail::assert_handler{});
|
||||
return result;
|
||||
}
|
||||
|
||||
void append_integer_suffix(std::string& literal, const char*& ptr)
|
||||
{
|
||||
auto append_unsigned_suffix = [](std::string& literal, const char*& ptr) {
|
||||
if (*ptr == 'u' || *ptr == 'U')
|
||||
{
|
||||
literal += *ptr++;
|
||||
return true;
|
||||
}
|
||||
else
|
||||
return false;
|
||||
};
|
||||
auto append_long_suffix = [](std::string& literal, const char*& ptr) {
|
||||
if (starts_with(ptr, "ll") || starts_with(ptr, "LL"))
|
||||
{
|
||||
literal += *ptr++;
|
||||
literal += *ptr++;
|
||||
return true;
|
||||
}
|
||||
else if (*ptr == 'l' || *ptr == 'L')
|
||||
{
|
||||
literal += *ptr++;
|
||||
return true;
|
||||
}
|
||||
else
|
||||
return false;
|
||||
};
|
||||
|
||||
if (append_unsigned_suffix(literal, ptr))
|
||||
append_long_suffix(literal, ptr);
|
||||
else if (append_long_suffix(literal, ptr))
|
||||
append_unsigned_suffix(literal, ptr);
|
||||
else
|
||||
append_udl_suffix(literal, ptr);
|
||||
}
|
||||
|
||||
void append_floating_point_suffix(std::string& literal, const char*& ptr)
|
||||
{
|
||||
if (*ptr == 'f' || *ptr == 'F')
|
||||
literal += *ptr++;
|
||||
else if (*ptr == 'l' || *ptr == 'L')
|
||||
literal += *ptr++;
|
||||
else
|
||||
append_udl_suffix(literal, ptr);
|
||||
}
|
||||
|
||||
type_safe::optional<std::string> parse_floating_point_exponent(const char*& ptr)
|
||||
{
|
||||
if (*ptr == 'e' || *ptr == 'E' || *ptr == 'p' || *ptr == 'P')
|
||||
{
|
||||
std::string result;
|
||||
result += *ptr++;
|
||||
if (*ptr == '+' || *ptr == '-')
|
||||
result += *ptr++;
|
||||
|
||||
result += parse_digit_sequence(ptr, &is_digit);
|
||||
return result;
|
||||
}
|
||||
else
|
||||
return type_safe::nullopt;
|
||||
}
|
||||
|
||||
type_safe::optional<cpp_token> numeric_literal_token(const char*& ptr)
|
||||
{
|
||||
if (starts_with(ptr, "0b") || starts_with(ptr, "0B")) // binary integer literal
|
||||
{
|
||||
std::string result;
|
||||
result += *ptr++;
|
||||
result += *ptr++;
|
||||
result += parse_digit_sequence(ptr, [](char c) { return c == '0' || c == '1'; });
|
||||
append_integer_suffix(result, ptr);
|
||||
return cpp_token(cpp_token_kind::int_literal, result);
|
||||
}
|
||||
else if (starts_with(ptr, "0x") || starts_with(ptr, "0X")) // hexadecimal literal
|
||||
{
|
||||
std::string result;
|
||||
result += *ptr++;
|
||||
result += *ptr++;
|
||||
result += parse_digit_sequence(ptr, &is_hexadecimal_digit);
|
||||
|
||||
auto is_float = false;
|
||||
if (*ptr == '.')
|
||||
{
|
||||
// floating point hexadecimal
|
||||
is_float = true;
|
||||
result += *ptr++;
|
||||
result += parse_digit_sequence(ptr, &is_hexadecimal_digit);
|
||||
}
|
||||
|
||||
if (auto exp = parse_floating_point_exponent(ptr))
|
||||
{
|
||||
is_float = true;
|
||||
// floating point exponent
|
||||
result += exp.value();
|
||||
}
|
||||
|
||||
if (is_float)
|
||||
append_floating_point_suffix(result, ptr);
|
||||
else
|
||||
append_integer_suffix(result, ptr);
|
||||
|
||||
return cpp_token(is_float ? cpp_token_kind::float_literal : cpp_token_kind::int_literal,
|
||||
result);
|
||||
}
|
||||
else if (is_digit(*ptr)) // octal and decimal literals
|
||||
{
|
||||
std::string result;
|
||||
result += parse_digit_sequence(ptr, &is_digit);
|
||||
|
||||
auto is_float = false;
|
||||
if (*ptr == '.')
|
||||
{
|
||||
// floating point decimal
|
||||
is_float = true;
|
||||
result += *ptr++;
|
||||
result += parse_digit_sequence(ptr, &is_hexadecimal_digit);
|
||||
}
|
||||
|
||||
if (auto exp = parse_floating_point_exponent(ptr))
|
||||
{
|
||||
// floating point exponent
|
||||
is_float = true;
|
||||
result += exp.value();
|
||||
}
|
||||
|
||||
if (is_float)
|
||||
append_floating_point_suffix(result, ptr);
|
||||
else
|
||||
append_integer_suffix(result, ptr);
|
||||
|
||||
return cpp_token(is_float ? cpp_token_kind::float_literal : cpp_token_kind::int_literal,
|
||||
result);
|
||||
}
|
||||
else if (*ptr == '.' && is_digit(ptr[1]))
|
||||
{
|
||||
std::string result;
|
||||
|
||||
// floating point fraction
|
||||
result += *ptr++;
|
||||
result += parse_digit_sequence(ptr, &is_digit);
|
||||
|
||||
if (auto exp = parse_floating_point_exponent(ptr))
|
||||
result += exp.value();
|
||||
|
||||
append_floating_point_suffix(result, ptr);
|
||||
return cpp_token(cpp_token_kind::float_literal, result);
|
||||
}
|
||||
else
|
||||
return type_safe::nullopt;
|
||||
}
|
||||
|
||||
type_safe::optional<std::string> parse_encoding_prefix(const char*& ptr)
|
||||
{
|
||||
if (bump_if(ptr, "u8"))
|
||||
return "u8";
|
||||
else if (bump_if(ptr, "u"))
|
||||
return "u";
|
||||
else if (bump_if(ptr, "U"))
|
||||
return "U";
|
||||
else if (bump_if(ptr, "L"))
|
||||
return "L";
|
||||
else
|
||||
return type_safe::nullopt;
|
||||
}
|
||||
|
||||
type_safe::optional<cpp_token> character_literal(const char*& ptr)
|
||||
{
|
||||
auto save = ptr;
|
||||
auto prefix = parse_encoding_prefix(ptr);
|
||||
if (*ptr != '\'')
|
||||
{
|
||||
ptr = save;
|
||||
return type_safe::nullopt;
|
||||
}
|
||||
else
|
||||
{
|
||||
auto result = prefix.value_or("");
|
||||
result += *ptr++;
|
||||
|
||||
while (*ptr != '\'')
|
||||
{
|
||||
DEBUG_ASSERT(*ptr, detail::assert_handler{});
|
||||
|
||||
if (*ptr == '\\')
|
||||
result += *ptr++;
|
||||
result += *ptr++;
|
||||
}
|
||||
result += *ptr++;
|
||||
|
||||
append_udl_suffix(result, ptr);
|
||||
return cpp_token(cpp_token_kind::char_literal, result);
|
||||
}
|
||||
}
|
||||
|
||||
type_safe::optional<cpp_token> string_literal(const char*& ptr)
|
||||
{
|
||||
auto save = ptr;
|
||||
auto prefix = parse_encoding_prefix(ptr);
|
||||
if (starts_with(ptr, "R\""))
|
||||
{
|
||||
// raw string literal
|
||||
auto result = prefix.value_or("");
|
||||
result += *ptr++;
|
||||
result += *ptr++;
|
||||
|
||||
std::string terminator;
|
||||
terminator += ")";
|
||||
while (*ptr != '(')
|
||||
{
|
||||
result += *ptr;
|
||||
terminator += *ptr++;
|
||||
}
|
||||
result += *ptr++;
|
||||
terminator += '"';
|
||||
|
||||
while (!bump_if(ptr, terminator))
|
||||
{
|
||||
DEBUG_ASSERT(ptr, detail::assert_handler{});
|
||||
result += *ptr++;
|
||||
}
|
||||
result += terminator;
|
||||
|
||||
append_udl_suffix(result, ptr);
|
||||
return cpp_token(cpp_token_kind::string_literal, result);
|
||||
}
|
||||
else if (starts_with(ptr, "\""))
|
||||
{
|
||||
// regular string literal
|
||||
auto result = prefix.value_or("");
|
||||
result += *ptr++;
|
||||
|
||||
while (*ptr != '"')
|
||||
{
|
||||
DEBUG_ASSERT(*ptr, detail::assert_handler{});
|
||||
|
||||
if (*ptr == '\\')
|
||||
result += *ptr++;
|
||||
result += *ptr++;
|
||||
}
|
||||
result += *ptr++;
|
||||
|
||||
append_udl_suffix(result, ptr);
|
||||
return cpp_token(cpp_token_kind::string_literal, result);
|
||||
}
|
||||
else
|
||||
{
|
||||
ptr = save;
|
||||
return type_safe::nullopt;
|
||||
}
|
||||
}
|
||||
|
||||
type_safe::optional<cpp_token> digraph_token(const char*& ptr)
|
||||
{
|
||||
if (bump_if(ptr, "<%"))
|
||||
return cpp_token(cpp_token_kind::punctuation, "{");
|
||||
else if (bump_if(ptr, "%>"))
|
||||
return cpp_token(cpp_token_kind::punctuation, "}");
|
||||
else if (starts_with(ptr, "<::") && ptr[3] != ':' && ptr[3] != '>')
|
||||
// don't detect digraph in std::vector<::std::string>
|
||||
return type_safe::nullopt;
|
||||
else if (bump_if(ptr, "<:"))
|
||||
return cpp_token(cpp_token_kind::punctuation, "[");
|
||||
else if (bump_if(ptr, ":>"))
|
||||
return cpp_token(cpp_token_kind::punctuation, "]");
|
||||
else if (bump_if(ptr, "%:%:"))
|
||||
return cpp_token(cpp_token_kind::punctuation, "##");
|
||||
else if (bump_if(ptr, "%:"))
|
||||
return cpp_token(cpp_token_kind::punctuation, "#");
|
||||
else
|
||||
return type_safe::nullopt;
|
||||
}
|
||||
|
||||
type_safe::optional<cpp_token> punctuation_token(const char*& ptr)
|
||||
{
|
||||
static constexpr const char* punctuations[] = {
|
||||
// tokens staring with #
|
||||
"##",
|
||||
"#",
|
||||
// tokens starting with .
|
||||
"...",
|
||||
".*",
|
||||
".",
|
||||
// tokens starting with :
|
||||
"::",
|
||||
":",
|
||||
// tokens starting with +
|
||||
"+=",
|
||||
"++",
|
||||
"+",
|
||||
// tokens starting with -
|
||||
"->*",
|
||||
"->",
|
||||
"--",
|
||||
"-=",
|
||||
"-",
|
||||
// tokens starting with *
|
||||
"*=",
|
||||
"*",
|
||||
// tokens starting with /
|
||||
"/=",
|
||||
"/",
|
||||
// tokens starting with %
|
||||
"%=",
|
||||
"%",
|
||||
// tokens starting with ^
|
||||
"^=",
|
||||
"^",
|
||||
// tokens starting with &
|
||||
"&=",
|
||||
"&&",
|
||||
"&",
|
||||
// tokens starting with |
|
||||
"|=",
|
||||
"||",
|
||||
"|",
|
||||
// tokens starting with <
|
||||
"<<=",
|
||||
"<<",
|
||||
"<=",
|
||||
"<",
|
||||
// tokens starting with >
|
||||
">>=",
|
||||
">>",
|
||||
">=",
|
||||
">",
|
||||
// tokens starting with !
|
||||
"!=",
|
||||
"!",
|
||||
// tokens starting with =
|
||||
"==",
|
||||
"=",
|
||||
// single tokens
|
||||
"~",
|
||||
";",
|
||||
"?",
|
||||
",",
|
||||
"{",
|
||||
"}",
|
||||
"[",
|
||||
"]",
|
||||
"(",
|
||||
")",
|
||||
};
|
||||
|
||||
for (auto punct : punctuations)
|
||||
if (bump_if(ptr, punct))
|
||||
return cpp_token(cpp_token_kind::punctuation, punct);
|
||||
|
||||
return type_safe::nullopt;
|
||||
}
|
||||
}
|
||||
|
||||
cpp_token_string cpp_token_string::tokenize(std::string str)
|
||||
{
|
||||
cpp_token_string::builder builder;
|
||||
|
||||
auto ptr = str.c_str();
|
||||
while (*ptr)
|
||||
{
|
||||
if (auto num = numeric_literal_token(ptr))
|
||||
builder.add_token(num.value());
|
||||
else if (auto char_lit = character_literal(ptr))
|
||||
builder.add_token(char_lit.value());
|
||||
else if (auto str_lit = string_literal(ptr))
|
||||
builder.add_token(str_lit.value());
|
||||
else if (auto digraphs = digraph_token(ptr))
|
||||
builder.add_token(digraphs.value());
|
||||
else if (auto punct = punctuation_token(ptr))
|
||||
builder.add_token(punct.value());
|
||||
else if (auto id = identifier_token(ptr))
|
||||
builder.add_token(id.value());
|
||||
else if (*ptr == ' ' || *ptr == '\t' || *ptr == '\n' || *ptr == '\r')
|
||||
++ptr;
|
||||
else
|
||||
DEBUG_UNREACHABLE(detail::assert_handler{});
|
||||
}
|
||||
|
||||
return builder.finish();
|
||||
}
|
||||
|
||||
namespace
|
||||
{
|
||||
bool is_identifier(char c)
|
||||
|
|
|
|||
|
|
@ -412,9 +412,9 @@ bool detail::skip_attribute(detail::cxtoken_stream& stream)
|
|||
|
||||
namespace
|
||||
{
|
||||
cpp_token_kind get_kind(CXTokenKind kind)
|
||||
cpp_token_kind get_kind(const detail::cxtoken& token)
|
||||
{
|
||||
switch (kind)
|
||||
switch (token.kind())
|
||||
{
|
||||
case CXToken_Punctuation:
|
||||
return cpp_token_kind::punctuation;
|
||||
|
|
@ -422,14 +422,26 @@ namespace
|
|||
return cpp_token_kind::keyword;
|
||||
case CXToken_Identifier:
|
||||
return cpp_token_kind::identifier;
|
||||
|
||||
case CXToken_Literal:
|
||||
return cpp_token_kind::literal;
|
||||
{
|
||||
auto spelling = token.value().std_str();
|
||||
if (spelling.find('.') != std::string::npos)
|
||||
return cpp_token_kind::float_literal;
|
||||
else if (std::isdigit(spelling.front()))
|
||||
return cpp_token_kind::int_literal;
|
||||
else if (spelling.back() == '\'')
|
||||
return cpp_token_kind::char_literal;
|
||||
else
|
||||
return cpp_token_kind::string_literal;
|
||||
}
|
||||
|
||||
case CXToken_Comment:
|
||||
break;
|
||||
}
|
||||
|
||||
DEBUG_UNREACHABLE(detail::assert_handler{});
|
||||
return cpp_token_kind ::literal;
|
||||
return cpp_token_kind::punctuation;
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -440,7 +452,7 @@ cpp_token_string detail::to_string(cxtoken_stream& stream, cxtoken_iterator end)
|
|||
while (stream.cur() != end)
|
||||
{
|
||||
auto& token = stream.get();
|
||||
builder.add_token(cpp_token(get_kind(token.kind()), token.c_str()));
|
||||
builder.add_token(cpp_token(get_kind(token), token.c_str()));
|
||||
}
|
||||
|
||||
if (stream.unmunch())
|
||||
|
|
|
|||
|
|
@ -246,7 +246,7 @@ namespace
|
|||
return size_expr.empty() ?
|
||||
nullptr :
|
||||
cpp_unexposed_expression::build(cpp_builtin_type::build(cpp_ulonglong),
|
||||
cpp_token_string::from_string(
|
||||
cpp_token_string::tokenize(
|
||||
std::string(size_expr.rbegin(),
|
||||
size_expr.rend())));
|
||||
}
|
||||
|
|
@ -488,7 +488,7 @@ namespace
|
|||
|
||||
return cpp_decltype_type::build(
|
||||
cpp_unexposed_expression::build(cpp_unexposed_type::build("<decltype>"),
|
||||
cpp_token_string::from_string(spelling)));
|
||||
cpp_token_string::tokenize(spelling)));
|
||||
});
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -24,6 +24,7 @@ set(tests
|
|||
cpp_preprocessor.cpp
|
||||
cpp_static_assert.cpp
|
||||
cpp_template_parameter.cpp
|
||||
cpp_token.cpp
|
||||
cpp_type_alias.cpp
|
||||
cpp_variable.cpp
|
||||
integration.cpp
|
||||
|
|
|
|||
|
|
@ -104,7 +104,7 @@ void ns::l()
|
|||
*cpp_unexposed_expression::
|
||||
build(cpp_pointer_type::build(
|
||||
cpp_builtin_type::build(cpp_float)),
|
||||
cpp_token_string::from_string("nullptr"))));
|
||||
cpp_token_string::tokenize("nullptr"))));
|
||||
}
|
||||
else
|
||||
REQUIRE(false);
|
||||
|
|
@ -135,7 +135,7 @@ void ns::l()
|
|||
*cpp_decltype_type::build(
|
||||
cpp_unexposed_expression::
|
||||
build(cpp_builtin_type::build(cpp_int),
|
||||
cpp_token_string::from_string("42")))));
|
||||
cpp_token_string::tokenize("42")))));
|
||||
REQUIRE(!param.default_value());
|
||||
}
|
||||
else
|
||||
|
|
@ -162,16 +162,17 @@ void ns::l()
|
|||
equal_expressions(func.noexcept_condition().value(),
|
||||
*cpp_literal_expression::build(std::move(bool_t), "true")));
|
||||
else if (func.name() == "e")
|
||||
REQUIRE(equal_expressions(func.noexcept_condition().value(),
|
||||
*cpp_unexposed_expression::
|
||||
build(std::move(bool_t),
|
||||
cpp_token_string::from_string("false"))));
|
||||
REQUIRE(
|
||||
equal_expressions(func.noexcept_condition().value(),
|
||||
*cpp_unexposed_expression::build(std::move(bool_t),
|
||||
cpp_token_string::tokenize(
|
||||
"false"))));
|
||||
else if (func.name() == "f")
|
||||
REQUIRE(
|
||||
equal_expressions(func.noexcept_condition().value(),
|
||||
*cpp_unexposed_expression::
|
||||
build(std::move(bool_t),
|
||||
cpp_token_string::from_string("noexcept(d())"))));
|
||||
*cpp_unexposed_expression::build(std::move(bool_t),
|
||||
cpp_token_string::tokenize(
|
||||
"noexcept(d())"))));
|
||||
}
|
||||
else if (func.name() == "g" || func.name() == "h" || func.name() == "i"
|
||||
|| func.name() == "j")
|
||||
|
|
|
|||
|
|
@ -402,12 +402,11 @@ d::~d() {}
|
|||
REQUIRE(!dtor.is_virtual());
|
||||
REQUIRE(dtor.body_kind() == cpp_function_definition);
|
||||
REQUIRE(dtor.noexcept_condition());
|
||||
REQUIRE(
|
||||
equal_expressions(dtor.noexcept_condition().value(),
|
||||
*cpp_unexposed_expression::build(cpp_builtin_type::build(
|
||||
cpp_bool),
|
||||
cpp_token_string::from_string(
|
||||
"false"))));
|
||||
REQUIRE(equal_expressions(dtor.noexcept_condition().value(),
|
||||
*cpp_unexposed_expression::build(cpp_builtin_type::build(
|
||||
cpp_bool),
|
||||
cpp_token_string::tokenize(
|
||||
"false"))));
|
||||
}
|
||||
else if (dtor.name() == "~c")
|
||||
{
|
||||
|
|
|
|||
|
|
@ -39,7 +39,7 @@ struct foo
|
|||
|
||||
// all initializers are unexposed
|
||||
auto def = cpp_unexposed_expression::build(cpp_builtin_type::build(cpp_float),
|
||||
cpp_token_string::from_string("3.14f"));
|
||||
cpp_token_string::tokenize("3.14f"));
|
||||
REQUIRE(var.default_value());
|
||||
REQUIRE(equal_expressions(var.default_value().value(), *def));
|
||||
|
||||
|
|
|
|||
|
|
@ -34,17 +34,15 @@ struct foo
|
|||
REQUIRE(equal_expressions(assert.expression(),
|
||||
*cpp_literal_expression::build(std::move(bool_t), "true")));
|
||||
else if (assert.message() == "a")
|
||||
REQUIRE(
|
||||
equal_expressions(assert.expression(),
|
||||
*cpp_unexposed_expression::build(std::move(bool_t),
|
||||
cpp_token_string::from_string(
|
||||
"true||false"))));
|
||||
REQUIRE(equal_expressions(assert.expression(),
|
||||
*cpp_unexposed_expression::build(std::move(bool_t),
|
||||
cpp_token_string::tokenize(
|
||||
"true||false"))));
|
||||
else if (assert.message() == "b")
|
||||
REQUIRE(
|
||||
equal_expressions(assert.expression(),
|
||||
*cpp_unexposed_expression::build(std::move(bool_t),
|
||||
cpp_token_string::from_string(
|
||||
"!B"))));
|
||||
REQUIRE(equal_expressions(assert.expression(),
|
||||
*cpp_unexposed_expression::build(std::move(bool_t),
|
||||
cpp_token_string::tokenize(
|
||||
"!B"))));
|
||||
else
|
||||
REQUIRE(false);
|
||||
});
|
||||
|
|
|
|||
|
|
@ -151,13 +151,13 @@ using d = void;
|
|||
cpp_builtin_type::build(cpp_char))));
|
||||
REQUIRE(!param.is_variadic());
|
||||
REQUIRE(param.default_value());
|
||||
REQUIRE(equal_expressions(param.default_value().value(),
|
||||
*cpp_unexposed_expression::
|
||||
build(cpp_builtin_type::build(
|
||||
cpp_nullptr),
|
||||
cpp_token_string::
|
||||
from_string(
|
||||
"nullptr"))));
|
||||
REQUIRE(
|
||||
equal_expressions(param.default_value().value(),
|
||||
*cpp_unexposed_expression::
|
||||
build(cpp_builtin_type::build(
|
||||
cpp_nullptr),
|
||||
cpp_token_string::tokenize(
|
||||
"nullptr"))));
|
||||
}
|
||||
else if (param.name() == "C")
|
||||
{
|
||||
|
|
|
|||
129
test/cpp_token.cpp
Normal file
129
test/cpp_token.cpp
Normal file
|
|
@ -0,0 +1,129 @@
|
|||
// Copyright (C) 2017 Jonathan Müller <jonathanmueller.dev@gmail.com>
|
||||
// This file is subject to the license terms in the LICENSE file
|
||||
// found in the top-level directory of this distribution.
|
||||
|
||||
#include <cppast/cpp_token.hpp>
|
||||
|
||||
#include <catch.hpp>
|
||||
|
||||
#include <algorithm>
|
||||
#include <initializer_list>
|
||||
|
||||
using namespace cppast;
|
||||
|
||||
void check_equal_tokens(const std::string& str, std::initializer_list<cpp_token> tokens)
|
||||
{
|
||||
auto token_str = cpp_token_string::tokenize(str);
|
||||
INFO(str);
|
||||
REQUIRE(token_str.end() - token_str.begin() == tokens.size());
|
||||
REQUIRE(std::equal(token_str.begin(), token_str.end(), tokens.begin()));
|
||||
}
|
||||
|
||||
TEST_CASE("tokenizer")
|
||||
{
|
||||
SECTION("integer literals")
|
||||
{
|
||||
check_equal_tokens(" 1234 ", {cpp_token(cpp_token_kind::int_literal, "1234")});
|
||||
check_equal_tokens("1, 2", {cpp_token(cpp_token_kind::int_literal, "1"),
|
||||
cpp_token(cpp_token_kind::punctuation, ","),
|
||||
cpp_token(cpp_token_kind::int_literal, "2")});
|
||||
|
||||
// integer suffixes
|
||||
check_equal_tokens("1234ul", {cpp_token(cpp_token_kind::int_literal, "1234ul")});
|
||||
check_equal_tokens("12'34LU", {cpp_token(cpp_token_kind::int_literal, "1234LU")});
|
||||
|
||||
// other integer formats
|
||||
check_equal_tokens("01234", {cpp_token(cpp_token_kind::int_literal, "01234")});
|
||||
check_equal_tokens("0x1234AF", {cpp_token(cpp_token_kind::int_literal, "0x1234AF")});
|
||||
check_equal_tokens("0b101101", {cpp_token(cpp_token_kind::int_literal, "0b101101")});
|
||||
}
|
||||
SECTION("floating point literals")
|
||||
{
|
||||
// floating point suffixes
|
||||
check_equal_tokens("3.14", {cpp_token(cpp_token_kind::float_literal, "3.14")});
|
||||
check_equal_tokens("3.14f", {cpp_token(cpp_token_kind::float_literal, "3.14f")});
|
||||
check_equal_tokens("3.14L", {cpp_token(cpp_token_kind::float_literal, "3.14L")});
|
||||
|
||||
// missing parts
|
||||
check_equal_tokens(".5", {cpp_token(cpp_token_kind::float_literal, ".5")});
|
||||
check_equal_tokens("1.", {cpp_token(cpp_token_kind::float_literal, "1.")});
|
||||
|
||||
// exponents
|
||||
check_equal_tokens("1.0e4", {cpp_token(cpp_token_kind::float_literal, "1.0e4")});
|
||||
check_equal_tokens("1e4", {cpp_token(cpp_token_kind::float_literal, "1e4")});
|
||||
check_equal_tokens(".5e-2", {cpp_token(cpp_token_kind::float_literal, ".5e-2")});
|
||||
|
||||
// hexadecimal
|
||||
check_equal_tokens("0xabc.def", {cpp_token(cpp_token_kind::float_literal, "0xabc.def")});
|
||||
check_equal_tokens("0x123p42", {cpp_token(cpp_token_kind::float_literal, "0x123p42")});
|
||||
}
|
||||
SECTION("character literals")
|
||||
{
|
||||
check_equal_tokens(R"('a')", {cpp_token(cpp_token_kind::char_literal, R"('a')")});
|
||||
check_equal_tokens(R"(u8'a')", {cpp_token(cpp_token_kind::char_literal, R"(u8'a')")});
|
||||
check_equal_tokens(R"(U'a')", {cpp_token(cpp_token_kind::char_literal, R"(U'a')")});
|
||||
check_equal_tokens(R"('\'')", {cpp_token(cpp_token_kind::char_literal, R"('\'')")});
|
||||
}
|
||||
SECTION("string literals")
|
||||
{
|
||||
check_equal_tokens(R"("hello")", {cpp_token(cpp_token_kind::string_literal, R"("hello")")});
|
||||
check_equal_tokens(R"(u8"he\"llo")",
|
||||
{cpp_token(cpp_token_kind::string_literal, R"(u8"he\"llo")")});
|
||||
|
||||
check_equal_tokens(R"*(R"(hel\"lo)")*",
|
||||
{cpp_token(cpp_token_kind::string_literal, R"*(R"(hel\"lo)")*")});
|
||||
check_equal_tokens(R"**(R"*(hello R"(foo)")*")**",
|
||||
{cpp_token(cpp_token_kind::string_literal,
|
||||
R"**(R"*(hello R"(foo)")*")**")});
|
||||
}
|
||||
SECTION("UDLs")
|
||||
{
|
||||
check_equal_tokens("123_foo", {cpp_token(cpp_token_kind::int_literal, "123_foo")});
|
||||
check_equal_tokens("123.456_foo",
|
||||
{cpp_token(cpp_token_kind::float_literal, "123.456_foo")});
|
||||
check_equal_tokens(R"("hi"_foo)",
|
||||
{cpp_token(cpp_token_kind::string_literal, R"("hi"_foo)")});
|
||||
}
|
||||
SECTION("identifiers")
|
||||
{
|
||||
check_equal_tokens("foo bar baz_a", {cpp_token(cpp_token_kind::identifier, "foo"),
|
||||
cpp_token(cpp_token_kind::identifier, "bar"),
|
||||
cpp_token(cpp_token_kind::identifier, "baz_a")});
|
||||
check_equal_tokens("constant", {cpp_token(cpp_token_kind::identifier, "constant")});
|
||||
}
|
||||
SECTION("keywords")
|
||||
{
|
||||
// just test some
|
||||
check_equal_tokens("const float auto", {cpp_token(cpp_token_kind::keyword, "const"),
|
||||
cpp_token(cpp_token_kind::keyword, "float"),
|
||||
cpp_token(cpp_token_kind::keyword, "auto")});
|
||||
}
|
||||
SECTION("punctuations")
|
||||
{
|
||||
// just test munch things
|
||||
check_equal_tokens("<< <= <", {cpp_token(cpp_token_kind::punctuation, "<<"),
|
||||
cpp_token(cpp_token_kind::punctuation, "<="),
|
||||
cpp_token(cpp_token_kind::punctuation, "<")});
|
||||
check_equal_tokens("- -- -> ->*", {cpp_token(cpp_token_kind::punctuation, "-"),
|
||||
cpp_token(cpp_token_kind::punctuation, "--"),
|
||||
cpp_token(cpp_token_kind::punctuation, "->"),
|
||||
cpp_token(cpp_token_kind::punctuation, "->*")});
|
||||
check_equal_tokens("--->>>>", {cpp_token(cpp_token_kind::punctuation, "--"),
|
||||
cpp_token(cpp_token_kind::punctuation, "->"),
|
||||
cpp_token(cpp_token_kind::punctuation, ">>"),
|
||||
cpp_token(cpp_token_kind::punctuation, ">")});
|
||||
|
||||
// alternative spellings
|
||||
check_equal_tokens("and not xor", {cpp_token(cpp_token_kind::punctuation, "&&"),
|
||||
cpp_token(cpp_token_kind::punctuation, "!"),
|
||||
cpp_token(cpp_token_kind::punctuation, "^")});
|
||||
|
||||
// digraphs
|
||||
check_equal_tokens("<% foo<::bar>", {cpp_token(cpp_token_kind::punctuation, "{"),
|
||||
cpp_token(cpp_token_kind::identifier, "foo"),
|
||||
cpp_token(cpp_token_kind::punctuation, "<"),
|
||||
cpp_token(cpp_token_kind::punctuation, "::"),
|
||||
cpp_token(cpp_token_kind::identifier, "bar"),
|
||||
cpp_token(cpp_token_kind::punctuation, ">")});
|
||||
}
|
||||
}
|
||||
|
|
@ -334,7 +334,7 @@ typedef decltype(0) w;
|
|||
return cpp_literal_expression::build(std::move(type), std::move(size));
|
||||
else
|
||||
return cpp_unexposed_expression::build(std::move(type),
|
||||
cpp_token_string::from_string(std::move(size)));
|
||||
cpp_token_string::tokenize(std::move(size)));
|
||||
};
|
||||
|
||||
cpp_entity_index idx;
|
||||
|
|
@ -507,7 +507,7 @@ typedef decltype(0) w;
|
|||
{
|
||||
auto type = cpp_decltype_type::build(
|
||||
cpp_unexposed_expression::build(cpp_builtin_type::build(cpp_int),
|
||||
cpp_token_string::from_string("0")));
|
||||
cpp_token_string::tokenize("0")));
|
||||
REQUIRE(equal_types(idx, alias.underlying_type(), *type));
|
||||
}
|
||||
else
|
||||
|
|
|
|||
|
|
@ -101,14 +101,13 @@ int r[] = {0};
|
|||
// unexposed due to implicit cast, I think
|
||||
type_safe::ref(
|
||||
*cpp_unexposed_expression::build(cpp_builtin_type::build(cpp_int),
|
||||
cpp_token_string::from_string(
|
||||
"42"))),
|
||||
cpp_token_string::tokenize("42"))),
|
||||
cpp_storage_class_none, false, false);
|
||||
else if (var.name() == "c")
|
||||
check_variable(var, *cpp_builtin_type::build(cpp_float),
|
||||
type_safe::ref(
|
||||
*cpp_unexposed_expression::build(cpp_builtin_type::build(cpp_float),
|
||||
cpp_token_string::from_string(
|
||||
cpp_token_string::tokenize(
|
||||
"3.f+0.14f"))),
|
||||
cpp_storage_class_none, false, false);
|
||||
else if (var.name() == "d")
|
||||
|
|
@ -126,8 +125,7 @@ int r[] = {0};
|
|||
cpp_cv_const),
|
||||
type_safe::ref(
|
||||
*cpp_unexposed_expression::build(cpp_builtin_type::build(cpp_int),
|
||||
cpp_token_string::from_string(
|
||||
"12"))),
|
||||
cpp_token_string::tokenize("12"))),
|
||||
cpp_storage_class_none, true, false);
|
||||
else if (var.name() == "i")
|
||||
{
|
||||
|
|
@ -147,7 +145,7 @@ int r[] = {0};
|
|||
*cpp_unexposed_expression::build(cpp_user_defined_type::build(
|
||||
cpp_type_ref(cpp_entity_id(""),
|
||||
"bar")),
|
||||
cpp_token_string::from_string(
|
||||
cpp_token_string::tokenize(
|
||||
"bar()"))),
|
||||
cpp_storage_class_none, false, false);
|
||||
return false;
|
||||
|
|
@ -169,8 +167,7 @@ int r[] = {0};
|
|||
check_variable(var, *cpp_auto_type::build(),
|
||||
type_safe::ref(
|
||||
*cpp_unexposed_expression::build(cpp_builtin_type::build(cpp_int),
|
||||
cpp_token_string::from_string(
|
||||
"128"))),
|
||||
cpp_token_string::tokenize("128"))),
|
||||
cpp_storage_class_none, false, false);
|
||||
else if (var.name() == "n")
|
||||
check_variable(var,
|
||||
|
|
@ -180,14 +177,13 @@ int r[] = {0};
|
|||
cpp_ref_lvalue),
|
||||
type_safe::ref(
|
||||
*cpp_unexposed_expression::build(cpp_builtin_type::build(cpp_int),
|
||||
cpp_token_string::from_string(
|
||||
"m"))),
|
||||
cpp_token_string::tokenize("m"))),
|
||||
cpp_storage_class_none, false, false);
|
||||
else if (var.name() == "o")
|
||||
check_variable(var,
|
||||
*cpp_decltype_type::build(
|
||||
cpp_unexposed_expression::build(cpp_builtin_type::build(cpp_int),
|
||||
cpp_token_string::from_string("0"))),
|
||||
cpp_token_string::tokenize("0"))),
|
||||
nullptr, cpp_storage_class_none, false, false);
|
||||
else if (var.name() == "p")
|
||||
check_variable(var,
|
||||
|
|
@ -196,13 +192,12 @@ int r[] = {0};
|
|||
build(cpp_decltype_type::build(
|
||||
cpp_unexposed_expression::
|
||||
build(cpp_builtin_type::build(cpp_int),
|
||||
cpp_token_string::from_string("o"))),
|
||||
cpp_token_string::tokenize("o"))),
|
||||
cpp_cv_const),
|
||||
cpp_ref_lvalue),
|
||||
type_safe::ref(
|
||||
*cpp_unexposed_expression::build(cpp_builtin_type::build(cpp_int),
|
||||
cpp_token_string::from_string(
|
||||
"o"))),
|
||||
cpp_token_string::tokenize("o"))),
|
||||
cpp_storage_class_none, false, false);
|
||||
else if (var.name() == "q")
|
||||
check_variable(var,
|
||||
|
|
@ -219,8 +214,7 @@ int r[] = {0};
|
|||
"1")),
|
||||
type_safe::ref(
|
||||
*cpp_unexposed_expression::build(cpp_unexposed_type::build(""),
|
||||
cpp_token_string::from_string(
|
||||
"{0}"))),
|
||||
cpp_token_string::tokenize("{0}"))),
|
||||
cpp_storage_class_none, false, false);
|
||||
else
|
||||
REQUIRE(false);
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue