Add function to tokenize strings

This commit is contained in:
Jonathan Müller 2017-10-19 19:02:27 +02:00
commit 1572920650
14 changed files with 798 additions and 83 deletions

View file

@ -15,12 +15,13 @@ namespace cppast
/// The kinds of C++ tokens.
enum class cpp_token_kind
{
identifier, //< Any identifier.
keyword, //< Any keyword.
literal, //< Any literal.
punctuation, //< Any other punctuation.
unknown, //< An unknown token.
identifier, //< Any identifier.
keyword, //< Any keyword.
int_literal, //< An integer literal.
float_literal, //< A floating point literal.
char_literal, //< A character literal.
string_literal, //< A string literal.
punctuation //< Any other punctuation.
};
/// A C++ token.
@ -74,16 +75,15 @@ namespace cppast
std::vector<cpp_token> tokens_;
};
/// Tokenizes a string.
/// \effects Splits the string into C++ tokens.
/// The string must contain valid tokens and must already be preprocessed (i.e. translation phase 6 is already done).
/// \returns The tokenized string.
static cpp_token_string tokenize(std::string str);
/// \effects Creates it from a sequence of tokens.
cpp_token_string(std::vector<cpp_token> tokens) : tokens_(std::move(tokens)) {}
/// \effects Creates from a string.
/// \notes This does not do tokenization, it will only store a single, unknown token!
static cpp_token_string from_string(std::string str)
{
return cpp_token_string({cpp_token(cpp_token_kind::unknown, std::move(str))});
}
/// \exclude target
using iterator = std::vector<cpp_token>::const_iterator;

View file

@ -1158,7 +1158,7 @@ void detail::write_template_arguments(
void detail::write_token_string(code_generator::output& output, const cpp_token_string& tokens)
{
auto last_kind = cpp_token_kind::unknown;
auto last_kind = cpp_token_kind::punctuation; // neutral regarding whitespace
for (auto& token : tokens)
{
switch (token.kind)
@ -1177,14 +1177,15 @@ void detail::write_token_string(code_generator::output& output, const cpp_token_
output << operator_ws;
break;
case cpp_token_kind::literal:
// determine kind of literal
if (token.spelling.front() == '\"')
output << string_literal(token.spelling);
else if (token.spelling.find('.') != std::string::npos)
output << float_literal(token.spelling);
else
output << int_literal(token.spelling);
case cpp_token_kind::int_literal:
output << int_literal(token.spelling);
break;
case cpp_token_kind::float_literal:
output << float_literal(token.spelling);
break;
case cpp_token_kind::char_literal:
case cpp_token_kind::string_literal:
output << string_literal(token.spelling);
break;
case cpp_token_kind::punctuation:
@ -1206,9 +1207,6 @@ void detail::write_token_string(code_generator::output& output, const cpp_token_
else
output << punctuation(token.spelling);
break;
case cpp_token_kind::unknown:
output << token_seq(token.spelling);
}
last_kind = token.kind;

View file

@ -6,6 +6,9 @@
#include <algorithm>
#include <cctype>
#include <cstring>
#include <type_safe/optional.hpp>
#include <cppast/detail/assert.hpp>
using namespace cppast;
@ -16,6 +19,586 @@ void cpp_token_string::builder::unmunch()
tokens_.back().spelling = ">";
}
namespace
{
template <std::size_t N>
bool starts_with(const char* ptr, const char (&str)[N])
{
return std::strncmp(ptr, str, N - 1u) == 0;
}
bool starts_with(const char* ptr, const std::string& str)
{
return std::strncmp(ptr, str.c_str(), str.size()) == 0;
}
template <std::size_t N>
bool bump_if(const char*& ptr, const char (&str)[N])
{
if (starts_with(ptr, str))
{
ptr += N - 1;
return true;
}
else
return false;
}
bool bump_if(const char*& ptr, const std::string& str)
{
if (starts_with(ptr, str))
{
ptr += str.size();
return true;
}
else
return false;
}
bool is_identifier_nondigit(char c)
{
// assume ASCII
if (c >= 'a' && c <= 'z')
return true;
else if (c >= 'A' && c <= 'Z')
return true;
else if (c == '_')
return true;
else
// technically \uXXX is allowed as well, but I haven't seen that used ever
return false;
}
bool is_digit(char c)
{
return c >= '0' && c <= '9';
}
bool is_hexadecimal_digit(char c)
{
return is_digit(c) || (c >= 'a' && c <= 'f') || (c >= 'A' && c <= 'F');
}
type_safe::optional<std::string> bump_identifier(const char*& ptr)
{
if (is_identifier_nondigit(*ptr))
{
std::string result;
result += *ptr++;
while (is_identifier_nondigit(*ptr) || is_digit(*ptr))
result += *ptr++;
return result;
}
else
return type_safe::nullopt;
}
type_safe::optional<cpp_token> identifier_token(const char*& ptr)
{
auto identifier = bump_identifier(ptr);
if (!identifier)
return type_safe::nullopt;
static constexpr const char* keywords[] = {"alignas",
"alignof",
"asm",
"auto",
"bool",
"break",
"case",
"catch",
"char",
"char16_t",
"char32_t",
"class",
"const",
"constexpr",
"const_cast",
"continue",
"decltype",
"default",
"delete",
"do",
"double",
"dynamic_cast",
"else",
"enum",
"explicit",
"export",
"extern",
"false",
"float",
"for",
"friend",
"goto",
"if",
"inline",
"int",
"long",
"mutable",
"namespace",
"new",
"noexcept",
"nullptr",
"operator",
"private",
"protected",
"public",
"register",
"reinterpret_cast",
"return",
"short",
"signed",
"sizeof",
"static",
"static_assert",
"static_cast",
"struct",
"switch",
"template",
"this",
"thread_local",
"throw",
"true",
"try",
"typedef",
"typeid",
"typename",
"union",
"unsigned",
"using",
"virtual",
"void",
"volatile",
"wchar_t",
"while"};
auto find_keyword = std::find(std::begin(keywords), std::end(keywords), identifier.value());
if (find_keyword != std::end(keywords))
return cpp_token(cpp_token_kind::keyword, identifier.value());
else if (identifier == "and")
return cpp_token(cpp_token_kind::punctuation, "&&");
else if (identifier == "and_eq")
return cpp_token(cpp_token_kind::punctuation, "&=");
else if (identifier == "bitand")
return cpp_token(cpp_token_kind::punctuation, "&");
else if (identifier == "bitor")
return cpp_token(cpp_token_kind::punctuation, "|");
else if (identifier == "compl")
return cpp_token(cpp_token_kind::punctuation, "~");
else if (identifier == "not")
return cpp_token(cpp_token_kind::punctuation, "!");
else if (identifier == "not_eq")
return cpp_token(cpp_token_kind::punctuation, "!=");
else if (identifier == "or")
return cpp_token(cpp_token_kind::punctuation, "||");
else if (identifier == "or_eq")
return cpp_token(cpp_token_kind::punctuation, "|=");
else if (identifier == "xor")
return cpp_token(cpp_token_kind::punctuation, "^");
else if (identifier == "xor_eq")
return cpp_token(cpp_token_kind::punctuation, "^=");
else
return cpp_token(cpp_token_kind::identifier, identifier.value());
}
void append_udl_suffix(std::string& literal, const char*& ptr)
{
if (auto id = identifier_token(ptr))
literal += id.value().spelling;
}
template <typename DigitPredicate>
std::string parse_digit_sequence(const char*& ptr, DigitPredicate is_digit)
{
std::string result;
for (; is_digit(*ptr) || *ptr == '\''; ++ptr)
if (*ptr != '\'')
result += *ptr;
DEBUG_ASSERT(result.back() != '\'', detail::assert_handler{});
return result;
}
void append_integer_suffix(std::string& literal, const char*& ptr)
{
auto append_unsigned_suffix = [](std::string& literal, const char*& ptr) {
if (*ptr == 'u' || *ptr == 'U')
{
literal += *ptr++;
return true;
}
else
return false;
};
auto append_long_suffix = [](std::string& literal, const char*& ptr) {
if (starts_with(ptr, "ll") || starts_with(ptr, "LL"))
{
literal += *ptr++;
literal += *ptr++;
return true;
}
else if (*ptr == 'l' || *ptr == 'L')
{
literal += *ptr++;
return true;
}
else
return false;
};
if (append_unsigned_suffix(literal, ptr))
append_long_suffix(literal, ptr);
else if (append_long_suffix(literal, ptr))
append_unsigned_suffix(literal, ptr);
else
append_udl_suffix(literal, ptr);
}
void append_floating_point_suffix(std::string& literal, const char*& ptr)
{
if (*ptr == 'f' || *ptr == 'F')
literal += *ptr++;
else if (*ptr == 'l' || *ptr == 'L')
literal += *ptr++;
else
append_udl_suffix(literal, ptr);
}
type_safe::optional<std::string> parse_floating_point_exponent(const char*& ptr)
{
if (*ptr == 'e' || *ptr == 'E' || *ptr == 'p' || *ptr == 'P')
{
std::string result;
result += *ptr++;
if (*ptr == '+' || *ptr == '-')
result += *ptr++;
result += parse_digit_sequence(ptr, &is_digit);
return result;
}
else
return type_safe::nullopt;
}
type_safe::optional<cpp_token> numeric_literal_token(const char*& ptr)
{
if (starts_with(ptr, "0b") || starts_with(ptr, "0B")) // binary integer literal
{
std::string result;
result += *ptr++;
result += *ptr++;
result += parse_digit_sequence(ptr, [](char c) { return c == '0' || c == '1'; });
append_integer_suffix(result, ptr);
return cpp_token(cpp_token_kind::int_literal, result);
}
else if (starts_with(ptr, "0x") || starts_with(ptr, "0X")) // hexadecimal literal
{
std::string result;
result += *ptr++;
result += *ptr++;
result += parse_digit_sequence(ptr, &is_hexadecimal_digit);
auto is_float = false;
if (*ptr == '.')
{
// floating point hexadecimal
is_float = true;
result += *ptr++;
result += parse_digit_sequence(ptr, &is_hexadecimal_digit);
}
if (auto exp = parse_floating_point_exponent(ptr))
{
is_float = true;
// floating point exponent
result += exp.value();
}
if (is_float)
append_floating_point_suffix(result, ptr);
else
append_integer_suffix(result, ptr);
return cpp_token(is_float ? cpp_token_kind::float_literal : cpp_token_kind::int_literal,
result);
}
else if (is_digit(*ptr)) // octal and decimal literals
{
std::string result;
result += parse_digit_sequence(ptr, &is_digit);
auto is_float = false;
if (*ptr == '.')
{
// floating point decimal
is_float = true;
result += *ptr++;
result += parse_digit_sequence(ptr, &is_hexadecimal_digit);
}
if (auto exp = parse_floating_point_exponent(ptr))
{
// floating point exponent
is_float = true;
result += exp.value();
}
if (is_float)
append_floating_point_suffix(result, ptr);
else
append_integer_suffix(result, ptr);
return cpp_token(is_float ? cpp_token_kind::float_literal : cpp_token_kind::int_literal,
result);
}
else if (*ptr == '.' && is_digit(ptr[1]))
{
std::string result;
// floating point fraction
result += *ptr++;
result += parse_digit_sequence(ptr, &is_digit);
if (auto exp = parse_floating_point_exponent(ptr))
result += exp.value();
append_floating_point_suffix(result, ptr);
return cpp_token(cpp_token_kind::float_literal, result);
}
else
return type_safe::nullopt;
}
type_safe::optional<std::string> parse_encoding_prefix(const char*& ptr)
{
if (bump_if(ptr, "u8"))
return "u8";
else if (bump_if(ptr, "u"))
return "u";
else if (bump_if(ptr, "U"))
return "U";
else if (bump_if(ptr, "L"))
return "L";
else
return type_safe::nullopt;
}
type_safe::optional<cpp_token> character_literal(const char*& ptr)
{
auto save = ptr;
auto prefix = parse_encoding_prefix(ptr);
if (*ptr != '\'')
{
ptr = save;
return type_safe::nullopt;
}
else
{
auto result = prefix.value_or("");
result += *ptr++;
while (*ptr != '\'')
{
DEBUG_ASSERT(*ptr, detail::assert_handler{});
if (*ptr == '\\')
result += *ptr++;
result += *ptr++;
}
result += *ptr++;
append_udl_suffix(result, ptr);
return cpp_token(cpp_token_kind::char_literal, result);
}
}
type_safe::optional<cpp_token> string_literal(const char*& ptr)
{
auto save = ptr;
auto prefix = parse_encoding_prefix(ptr);
if (starts_with(ptr, "R\""))
{
// raw string literal
auto result = prefix.value_or("");
result += *ptr++;
result += *ptr++;
std::string terminator;
terminator += ")";
while (*ptr != '(')
{
result += *ptr;
terminator += *ptr++;
}
result += *ptr++;
terminator += '"';
while (!bump_if(ptr, terminator))
{
DEBUG_ASSERT(ptr, detail::assert_handler{});
result += *ptr++;
}
result += terminator;
append_udl_suffix(result, ptr);
return cpp_token(cpp_token_kind::string_literal, result);
}
else if (starts_with(ptr, "\""))
{
// regular string literal
auto result = prefix.value_or("");
result += *ptr++;
while (*ptr != '"')
{
DEBUG_ASSERT(*ptr, detail::assert_handler{});
if (*ptr == '\\')
result += *ptr++;
result += *ptr++;
}
result += *ptr++;
append_udl_suffix(result, ptr);
return cpp_token(cpp_token_kind::string_literal, result);
}
else
{
ptr = save;
return type_safe::nullopt;
}
}
type_safe::optional<cpp_token> digraph_token(const char*& ptr)
{
if (bump_if(ptr, "<%"))
return cpp_token(cpp_token_kind::punctuation, "{");
else if (bump_if(ptr, "%>"))
return cpp_token(cpp_token_kind::punctuation, "}");
else if (starts_with(ptr, "<::") && ptr[3] != ':' && ptr[3] != '>')
// don't detect digraph in std::vector<::std::string>
return type_safe::nullopt;
else if (bump_if(ptr, "<:"))
return cpp_token(cpp_token_kind::punctuation, "[");
else if (bump_if(ptr, ":>"))
return cpp_token(cpp_token_kind::punctuation, "]");
else if (bump_if(ptr, "%:%:"))
return cpp_token(cpp_token_kind::punctuation, "##");
else if (bump_if(ptr, "%:"))
return cpp_token(cpp_token_kind::punctuation, "#");
else
return type_safe::nullopt;
}
type_safe::optional<cpp_token> punctuation_token(const char*& ptr)
{
static constexpr const char* punctuations[] = {
// tokens staring with #
"##",
"#",
// tokens starting with .
"...",
".*",
".",
// tokens starting with :
"::",
":",
// tokens starting with +
"+=",
"++",
"+",
// tokens starting with -
"->*",
"->",
"--",
"-=",
"-",
// tokens starting with *
"*=",
"*",
// tokens starting with /
"/=",
"/",
// tokens starting with %
"%=",
"%",
// tokens starting with ^
"^=",
"^",
// tokens starting with &
"&=",
"&&",
"&",
// tokens starting with |
"|=",
"||",
"|",
// tokens starting with <
"<<=",
"<<",
"<=",
"<",
// tokens starting with >
">>=",
">>",
">=",
">",
// tokens starting with !
"!=",
"!",
// tokens starting with =
"==",
"=",
// single tokens
"~",
";",
"?",
",",
"{",
"}",
"[",
"]",
"(",
")",
};
for (auto punct : punctuations)
if (bump_if(ptr, punct))
return cpp_token(cpp_token_kind::punctuation, punct);
return type_safe::nullopt;
}
}
cpp_token_string cpp_token_string::tokenize(std::string str)
{
cpp_token_string::builder builder;
auto ptr = str.c_str();
while (*ptr)
{
if (auto num = numeric_literal_token(ptr))
builder.add_token(num.value());
else if (auto char_lit = character_literal(ptr))
builder.add_token(char_lit.value());
else if (auto str_lit = string_literal(ptr))
builder.add_token(str_lit.value());
else if (auto digraphs = digraph_token(ptr))
builder.add_token(digraphs.value());
else if (auto punct = punctuation_token(ptr))
builder.add_token(punct.value());
else if (auto id = identifier_token(ptr))
builder.add_token(id.value());
else if (*ptr == ' ' || *ptr == '\t' || *ptr == '\n' || *ptr == '\r')
++ptr;
else
DEBUG_UNREACHABLE(detail::assert_handler{});
}
return builder.finish();
}
namespace
{
bool is_identifier(char c)

View file

@ -412,9 +412,9 @@ bool detail::skip_attribute(detail::cxtoken_stream& stream)
namespace
{
cpp_token_kind get_kind(CXTokenKind kind)
cpp_token_kind get_kind(const detail::cxtoken& token)
{
switch (kind)
switch (token.kind())
{
case CXToken_Punctuation:
return cpp_token_kind::punctuation;
@ -422,14 +422,26 @@ namespace
return cpp_token_kind::keyword;
case CXToken_Identifier:
return cpp_token_kind::identifier;
case CXToken_Literal:
return cpp_token_kind::literal;
{
auto spelling = token.value().std_str();
if (spelling.find('.') != std::string::npos)
return cpp_token_kind::float_literal;
else if (std::isdigit(spelling.front()))
return cpp_token_kind::int_literal;
else if (spelling.back() == '\'')
return cpp_token_kind::char_literal;
else
return cpp_token_kind::string_literal;
}
case CXToken_Comment:
break;
}
DEBUG_UNREACHABLE(detail::assert_handler{});
return cpp_token_kind ::literal;
return cpp_token_kind::punctuation;
}
}
@ -440,7 +452,7 @@ cpp_token_string detail::to_string(cxtoken_stream& stream, cxtoken_iterator end)
while (stream.cur() != end)
{
auto& token = stream.get();
builder.add_token(cpp_token(get_kind(token.kind()), token.c_str()));
builder.add_token(cpp_token(get_kind(token), token.c_str()));
}
if (stream.unmunch())

View file

@ -246,7 +246,7 @@ namespace
return size_expr.empty() ?
nullptr :
cpp_unexposed_expression::build(cpp_builtin_type::build(cpp_ulonglong),
cpp_token_string::from_string(
cpp_token_string::tokenize(
std::string(size_expr.rbegin(),
size_expr.rend())));
}
@ -488,7 +488,7 @@ namespace
return cpp_decltype_type::build(
cpp_unexposed_expression::build(cpp_unexposed_type::build("<decltype>"),
cpp_token_string::from_string(spelling)));
cpp_token_string::tokenize(spelling)));
});
}

View file

@ -24,6 +24,7 @@ set(tests
cpp_preprocessor.cpp
cpp_static_assert.cpp
cpp_template_parameter.cpp
cpp_token.cpp
cpp_type_alias.cpp
cpp_variable.cpp
integration.cpp

View file

@ -104,7 +104,7 @@ void ns::l()
*cpp_unexposed_expression::
build(cpp_pointer_type::build(
cpp_builtin_type::build(cpp_float)),
cpp_token_string::from_string("nullptr"))));
cpp_token_string::tokenize("nullptr"))));
}
else
REQUIRE(false);
@ -135,7 +135,7 @@ void ns::l()
*cpp_decltype_type::build(
cpp_unexposed_expression::
build(cpp_builtin_type::build(cpp_int),
cpp_token_string::from_string("42")))));
cpp_token_string::tokenize("42")))));
REQUIRE(!param.default_value());
}
else
@ -162,16 +162,17 @@ void ns::l()
equal_expressions(func.noexcept_condition().value(),
*cpp_literal_expression::build(std::move(bool_t), "true")));
else if (func.name() == "e")
REQUIRE(equal_expressions(func.noexcept_condition().value(),
*cpp_unexposed_expression::
build(std::move(bool_t),
cpp_token_string::from_string("false"))));
REQUIRE(
equal_expressions(func.noexcept_condition().value(),
*cpp_unexposed_expression::build(std::move(bool_t),
cpp_token_string::tokenize(
"false"))));
else if (func.name() == "f")
REQUIRE(
equal_expressions(func.noexcept_condition().value(),
*cpp_unexposed_expression::
build(std::move(bool_t),
cpp_token_string::from_string("noexcept(d())"))));
*cpp_unexposed_expression::build(std::move(bool_t),
cpp_token_string::tokenize(
"noexcept(d())"))));
}
else if (func.name() == "g" || func.name() == "h" || func.name() == "i"
|| func.name() == "j")

View file

@ -402,12 +402,11 @@ d::~d() {}
REQUIRE(!dtor.is_virtual());
REQUIRE(dtor.body_kind() == cpp_function_definition);
REQUIRE(dtor.noexcept_condition());
REQUIRE(
equal_expressions(dtor.noexcept_condition().value(),
*cpp_unexposed_expression::build(cpp_builtin_type::build(
cpp_bool),
cpp_token_string::from_string(
"false"))));
REQUIRE(equal_expressions(dtor.noexcept_condition().value(),
*cpp_unexposed_expression::build(cpp_builtin_type::build(
cpp_bool),
cpp_token_string::tokenize(
"false"))));
}
else if (dtor.name() == "~c")
{

View file

@ -39,7 +39,7 @@ struct foo
// all initializers are unexposed
auto def = cpp_unexposed_expression::build(cpp_builtin_type::build(cpp_float),
cpp_token_string::from_string("3.14f"));
cpp_token_string::tokenize("3.14f"));
REQUIRE(var.default_value());
REQUIRE(equal_expressions(var.default_value().value(), *def));

View file

@ -34,17 +34,15 @@ struct foo
REQUIRE(equal_expressions(assert.expression(),
*cpp_literal_expression::build(std::move(bool_t), "true")));
else if (assert.message() == "a")
REQUIRE(
equal_expressions(assert.expression(),
*cpp_unexposed_expression::build(std::move(bool_t),
cpp_token_string::from_string(
"true||false"))));
REQUIRE(equal_expressions(assert.expression(),
*cpp_unexposed_expression::build(std::move(bool_t),
cpp_token_string::tokenize(
"true||false"))));
else if (assert.message() == "b")
REQUIRE(
equal_expressions(assert.expression(),
*cpp_unexposed_expression::build(std::move(bool_t),
cpp_token_string::from_string(
"!B"))));
REQUIRE(equal_expressions(assert.expression(),
*cpp_unexposed_expression::build(std::move(bool_t),
cpp_token_string::tokenize(
"!B"))));
else
REQUIRE(false);
});

View file

@ -151,13 +151,13 @@ using d = void;
cpp_builtin_type::build(cpp_char))));
REQUIRE(!param.is_variadic());
REQUIRE(param.default_value());
REQUIRE(equal_expressions(param.default_value().value(),
*cpp_unexposed_expression::
build(cpp_builtin_type::build(
cpp_nullptr),
cpp_token_string::
from_string(
"nullptr"))));
REQUIRE(
equal_expressions(param.default_value().value(),
*cpp_unexposed_expression::
build(cpp_builtin_type::build(
cpp_nullptr),
cpp_token_string::tokenize(
"nullptr"))));
}
else if (param.name() == "C")
{

129
test/cpp_token.cpp Normal file
View file

@ -0,0 +1,129 @@
// Copyright (C) 2017 Jonathan Müller <jonathanmueller.dev@gmail.com>
// This file is subject to the license terms in the LICENSE file
// found in the top-level directory of this distribution.
#include <cppast/cpp_token.hpp>
#include <catch.hpp>
#include <algorithm>
#include <initializer_list>
using namespace cppast;
void check_equal_tokens(const std::string& str, std::initializer_list<cpp_token> tokens)
{
auto token_str = cpp_token_string::tokenize(str);
INFO(str);
REQUIRE(token_str.end() - token_str.begin() == tokens.size());
REQUIRE(std::equal(token_str.begin(), token_str.end(), tokens.begin()));
}
TEST_CASE("tokenizer")
{
SECTION("integer literals")
{
check_equal_tokens(" 1234 ", {cpp_token(cpp_token_kind::int_literal, "1234")});
check_equal_tokens("1, 2", {cpp_token(cpp_token_kind::int_literal, "1"),
cpp_token(cpp_token_kind::punctuation, ","),
cpp_token(cpp_token_kind::int_literal, "2")});
// integer suffixes
check_equal_tokens("1234ul", {cpp_token(cpp_token_kind::int_literal, "1234ul")});
check_equal_tokens("12'34LU", {cpp_token(cpp_token_kind::int_literal, "1234LU")});
// other integer formats
check_equal_tokens("01234", {cpp_token(cpp_token_kind::int_literal, "01234")});
check_equal_tokens("0x1234AF", {cpp_token(cpp_token_kind::int_literal, "0x1234AF")});
check_equal_tokens("0b101101", {cpp_token(cpp_token_kind::int_literal, "0b101101")});
}
SECTION("floating point literals")
{
// floating point suffixes
check_equal_tokens("3.14", {cpp_token(cpp_token_kind::float_literal, "3.14")});
check_equal_tokens("3.14f", {cpp_token(cpp_token_kind::float_literal, "3.14f")});
check_equal_tokens("3.14L", {cpp_token(cpp_token_kind::float_literal, "3.14L")});
// missing parts
check_equal_tokens(".5", {cpp_token(cpp_token_kind::float_literal, ".5")});
check_equal_tokens("1.", {cpp_token(cpp_token_kind::float_literal, "1.")});
// exponents
check_equal_tokens("1.0e4", {cpp_token(cpp_token_kind::float_literal, "1.0e4")});
check_equal_tokens("1e4", {cpp_token(cpp_token_kind::float_literal, "1e4")});
check_equal_tokens(".5e-2", {cpp_token(cpp_token_kind::float_literal, ".5e-2")});
// hexadecimal
check_equal_tokens("0xabc.def", {cpp_token(cpp_token_kind::float_literal, "0xabc.def")});
check_equal_tokens("0x123p42", {cpp_token(cpp_token_kind::float_literal, "0x123p42")});
}
SECTION("character literals")
{
check_equal_tokens(R"('a')", {cpp_token(cpp_token_kind::char_literal, R"('a')")});
check_equal_tokens(R"(u8'a')", {cpp_token(cpp_token_kind::char_literal, R"(u8'a')")});
check_equal_tokens(R"(U'a')", {cpp_token(cpp_token_kind::char_literal, R"(U'a')")});
check_equal_tokens(R"('\'')", {cpp_token(cpp_token_kind::char_literal, R"('\'')")});
}
SECTION("string literals")
{
check_equal_tokens(R"("hello")", {cpp_token(cpp_token_kind::string_literal, R"("hello")")});
check_equal_tokens(R"(u8"he\"llo")",
{cpp_token(cpp_token_kind::string_literal, R"(u8"he\"llo")")});
check_equal_tokens(R"*(R"(hel\"lo)")*",
{cpp_token(cpp_token_kind::string_literal, R"*(R"(hel\"lo)")*")});
check_equal_tokens(R"**(R"*(hello R"(foo)")*")**",
{cpp_token(cpp_token_kind::string_literal,
R"**(R"*(hello R"(foo)")*")**")});
}
SECTION("UDLs")
{
check_equal_tokens("123_foo", {cpp_token(cpp_token_kind::int_literal, "123_foo")});
check_equal_tokens("123.456_foo",
{cpp_token(cpp_token_kind::float_literal, "123.456_foo")});
check_equal_tokens(R"("hi"_foo)",
{cpp_token(cpp_token_kind::string_literal, R"("hi"_foo)")});
}
SECTION("identifiers")
{
check_equal_tokens("foo bar baz_a", {cpp_token(cpp_token_kind::identifier, "foo"),
cpp_token(cpp_token_kind::identifier, "bar"),
cpp_token(cpp_token_kind::identifier, "baz_a")});
check_equal_tokens("constant", {cpp_token(cpp_token_kind::identifier, "constant")});
}
SECTION("keywords")
{
// just test some
check_equal_tokens("const float auto", {cpp_token(cpp_token_kind::keyword, "const"),
cpp_token(cpp_token_kind::keyword, "float"),
cpp_token(cpp_token_kind::keyword, "auto")});
}
SECTION("punctuations")
{
// just test munch things
check_equal_tokens("<< <= <", {cpp_token(cpp_token_kind::punctuation, "<<"),
cpp_token(cpp_token_kind::punctuation, "<="),
cpp_token(cpp_token_kind::punctuation, "<")});
check_equal_tokens("- -- -> ->*", {cpp_token(cpp_token_kind::punctuation, "-"),
cpp_token(cpp_token_kind::punctuation, "--"),
cpp_token(cpp_token_kind::punctuation, "->"),
cpp_token(cpp_token_kind::punctuation, "->*")});
check_equal_tokens("--->>>>", {cpp_token(cpp_token_kind::punctuation, "--"),
cpp_token(cpp_token_kind::punctuation, "->"),
cpp_token(cpp_token_kind::punctuation, ">>"),
cpp_token(cpp_token_kind::punctuation, ">")});
// alternative spellings
check_equal_tokens("and not xor", {cpp_token(cpp_token_kind::punctuation, "&&"),
cpp_token(cpp_token_kind::punctuation, "!"),
cpp_token(cpp_token_kind::punctuation, "^")});
// digraphs
check_equal_tokens("<% foo<::bar>", {cpp_token(cpp_token_kind::punctuation, "{"),
cpp_token(cpp_token_kind::identifier, "foo"),
cpp_token(cpp_token_kind::punctuation, "<"),
cpp_token(cpp_token_kind::punctuation, "::"),
cpp_token(cpp_token_kind::identifier, "bar"),
cpp_token(cpp_token_kind::punctuation, ">")});
}
}

View file

@ -334,7 +334,7 @@ typedef decltype(0) w;
return cpp_literal_expression::build(std::move(type), std::move(size));
else
return cpp_unexposed_expression::build(std::move(type),
cpp_token_string::from_string(std::move(size)));
cpp_token_string::tokenize(std::move(size)));
};
cpp_entity_index idx;
@ -507,7 +507,7 @@ typedef decltype(0) w;
{
auto type = cpp_decltype_type::build(
cpp_unexposed_expression::build(cpp_builtin_type::build(cpp_int),
cpp_token_string::from_string("0")));
cpp_token_string::tokenize("0")));
REQUIRE(equal_types(idx, alias.underlying_type(), *type));
}
else

View file

@ -101,14 +101,13 @@ int r[] = {0};
// unexposed due to implicit cast, I think
type_safe::ref(
*cpp_unexposed_expression::build(cpp_builtin_type::build(cpp_int),
cpp_token_string::from_string(
"42"))),
cpp_token_string::tokenize("42"))),
cpp_storage_class_none, false, false);
else if (var.name() == "c")
check_variable(var, *cpp_builtin_type::build(cpp_float),
type_safe::ref(
*cpp_unexposed_expression::build(cpp_builtin_type::build(cpp_float),
cpp_token_string::from_string(
cpp_token_string::tokenize(
"3.f+0.14f"))),
cpp_storage_class_none, false, false);
else if (var.name() == "d")
@ -126,8 +125,7 @@ int r[] = {0};
cpp_cv_const),
type_safe::ref(
*cpp_unexposed_expression::build(cpp_builtin_type::build(cpp_int),
cpp_token_string::from_string(
"12"))),
cpp_token_string::tokenize("12"))),
cpp_storage_class_none, true, false);
else if (var.name() == "i")
{
@ -147,7 +145,7 @@ int r[] = {0};
*cpp_unexposed_expression::build(cpp_user_defined_type::build(
cpp_type_ref(cpp_entity_id(""),
"bar")),
cpp_token_string::from_string(
cpp_token_string::tokenize(
"bar()"))),
cpp_storage_class_none, false, false);
return false;
@ -169,8 +167,7 @@ int r[] = {0};
check_variable(var, *cpp_auto_type::build(),
type_safe::ref(
*cpp_unexposed_expression::build(cpp_builtin_type::build(cpp_int),
cpp_token_string::from_string(
"128"))),
cpp_token_string::tokenize("128"))),
cpp_storage_class_none, false, false);
else if (var.name() == "n")
check_variable(var,
@ -180,14 +177,13 @@ int r[] = {0};
cpp_ref_lvalue),
type_safe::ref(
*cpp_unexposed_expression::build(cpp_builtin_type::build(cpp_int),
cpp_token_string::from_string(
"m"))),
cpp_token_string::tokenize("m"))),
cpp_storage_class_none, false, false);
else if (var.name() == "o")
check_variable(var,
*cpp_decltype_type::build(
cpp_unexposed_expression::build(cpp_builtin_type::build(cpp_int),
cpp_token_string::from_string("0"))),
cpp_token_string::tokenize("0"))),
nullptr, cpp_storage_class_none, false, false);
else if (var.name() == "p")
check_variable(var,
@ -196,13 +192,12 @@ int r[] = {0};
build(cpp_decltype_type::build(
cpp_unexposed_expression::
build(cpp_builtin_type::build(cpp_int),
cpp_token_string::from_string("o"))),
cpp_token_string::tokenize("o"))),
cpp_cv_const),
cpp_ref_lvalue),
type_safe::ref(
*cpp_unexposed_expression::build(cpp_builtin_type::build(cpp_int),
cpp_token_string::from_string(
"o"))),
cpp_token_string::tokenize("o"))),
cpp_storage_class_none, false, false);
else if (var.name() == "q")
check_variable(var,
@ -219,8 +214,7 @@ int r[] = {0};
"1")),
type_safe::ref(
*cpp_unexposed_expression::build(cpp_unexposed_type::build(""),
cpp_token_string::from_string(
"{0}"))),
cpp_token_string::tokenize("{0}"))),
cpp_storage_class_none, false, false);
else
REQUIRE(false);