Add function to tokenize strings

This commit is contained in:
Jonathan Müller 2017-10-19 19:02:27 +02:00
commit 1572920650
14 changed files with 798 additions and 83 deletions

View file

@ -24,6 +24,7 @@ set(tests
cpp_preprocessor.cpp
cpp_static_assert.cpp
cpp_template_parameter.cpp
cpp_token.cpp
cpp_type_alias.cpp
cpp_variable.cpp
integration.cpp

View file

@ -104,7 +104,7 @@ void ns::l()
*cpp_unexposed_expression::
build(cpp_pointer_type::build(
cpp_builtin_type::build(cpp_float)),
cpp_token_string::from_string("nullptr"))));
cpp_token_string::tokenize("nullptr"))));
}
else
REQUIRE(false);
@ -135,7 +135,7 @@ void ns::l()
*cpp_decltype_type::build(
cpp_unexposed_expression::
build(cpp_builtin_type::build(cpp_int),
cpp_token_string::from_string("42")))));
cpp_token_string::tokenize("42")))));
REQUIRE(!param.default_value());
}
else
@ -162,16 +162,17 @@ void ns::l()
equal_expressions(func.noexcept_condition().value(),
*cpp_literal_expression::build(std::move(bool_t), "true")));
else if (func.name() == "e")
REQUIRE(equal_expressions(func.noexcept_condition().value(),
*cpp_unexposed_expression::
build(std::move(bool_t),
cpp_token_string::from_string("false"))));
REQUIRE(
equal_expressions(func.noexcept_condition().value(),
*cpp_unexposed_expression::build(std::move(bool_t),
cpp_token_string::tokenize(
"false"))));
else if (func.name() == "f")
REQUIRE(
equal_expressions(func.noexcept_condition().value(),
*cpp_unexposed_expression::
build(std::move(bool_t),
cpp_token_string::from_string("noexcept(d())"))));
*cpp_unexposed_expression::build(std::move(bool_t),
cpp_token_string::tokenize(
"noexcept(d())"))));
}
else if (func.name() == "g" || func.name() == "h" || func.name() == "i"
|| func.name() == "j")

View file

@ -402,12 +402,11 @@ d::~d() {}
REQUIRE(!dtor.is_virtual());
REQUIRE(dtor.body_kind() == cpp_function_definition);
REQUIRE(dtor.noexcept_condition());
REQUIRE(
equal_expressions(dtor.noexcept_condition().value(),
*cpp_unexposed_expression::build(cpp_builtin_type::build(
cpp_bool),
cpp_token_string::from_string(
"false"))));
REQUIRE(equal_expressions(dtor.noexcept_condition().value(),
*cpp_unexposed_expression::build(cpp_builtin_type::build(
cpp_bool),
cpp_token_string::tokenize(
"false"))));
}
else if (dtor.name() == "~c")
{

View file

@ -39,7 +39,7 @@ struct foo
// all initializers are unexposed
auto def = cpp_unexposed_expression::build(cpp_builtin_type::build(cpp_float),
cpp_token_string::from_string("3.14f"));
cpp_token_string::tokenize("3.14f"));
REQUIRE(var.default_value());
REQUIRE(equal_expressions(var.default_value().value(), *def));

View file

@ -34,17 +34,15 @@ struct foo
REQUIRE(equal_expressions(assert.expression(),
*cpp_literal_expression::build(std::move(bool_t), "true")));
else if (assert.message() == "a")
REQUIRE(
equal_expressions(assert.expression(),
*cpp_unexposed_expression::build(std::move(bool_t),
cpp_token_string::from_string(
"true||false"))));
REQUIRE(equal_expressions(assert.expression(),
*cpp_unexposed_expression::build(std::move(bool_t),
cpp_token_string::tokenize(
"true||false"))));
else if (assert.message() == "b")
REQUIRE(
equal_expressions(assert.expression(),
*cpp_unexposed_expression::build(std::move(bool_t),
cpp_token_string::from_string(
"!B"))));
REQUIRE(equal_expressions(assert.expression(),
*cpp_unexposed_expression::build(std::move(bool_t),
cpp_token_string::tokenize(
"!B"))));
else
REQUIRE(false);
});

View file

@ -151,13 +151,13 @@ using d = void;
cpp_builtin_type::build(cpp_char))));
REQUIRE(!param.is_variadic());
REQUIRE(param.default_value());
REQUIRE(equal_expressions(param.default_value().value(),
*cpp_unexposed_expression::
build(cpp_builtin_type::build(
cpp_nullptr),
cpp_token_string::
from_string(
"nullptr"))));
REQUIRE(
equal_expressions(param.default_value().value(),
*cpp_unexposed_expression::
build(cpp_builtin_type::build(
cpp_nullptr),
cpp_token_string::tokenize(
"nullptr"))));
}
else if (param.name() == "C")
{

129
test/cpp_token.cpp Normal file
View file

@ -0,0 +1,129 @@
// Copyright (C) 2017 Jonathan Müller <jonathanmueller.dev@gmail.com>
// This file is subject to the license terms in the LICENSE file
// found in the top-level directory of this distribution.
#include <cppast/cpp_token.hpp>
#include <catch.hpp>
#include <algorithm>
#include <initializer_list>
using namespace cppast;
void check_equal_tokens(const std::string& str, std::initializer_list<cpp_token> tokens)
{
auto token_str = cpp_token_string::tokenize(str);
INFO(str);
REQUIRE(token_str.end() - token_str.begin() == tokens.size());
REQUIRE(std::equal(token_str.begin(), token_str.end(), tokens.begin()));
}
TEST_CASE("tokenizer")
{
SECTION("integer literals")
{
check_equal_tokens(" 1234 ", {cpp_token(cpp_token_kind::int_literal, "1234")});
check_equal_tokens("1, 2", {cpp_token(cpp_token_kind::int_literal, "1"),
cpp_token(cpp_token_kind::punctuation, ","),
cpp_token(cpp_token_kind::int_literal, "2")});
// integer suffixes
check_equal_tokens("1234ul", {cpp_token(cpp_token_kind::int_literal, "1234ul")});
check_equal_tokens("12'34LU", {cpp_token(cpp_token_kind::int_literal, "1234LU")});
// other integer formats
check_equal_tokens("01234", {cpp_token(cpp_token_kind::int_literal, "01234")});
check_equal_tokens("0x1234AF", {cpp_token(cpp_token_kind::int_literal, "0x1234AF")});
check_equal_tokens("0b101101", {cpp_token(cpp_token_kind::int_literal, "0b101101")});
}
SECTION("floating point literals")
{
// floating point suffixes
check_equal_tokens("3.14", {cpp_token(cpp_token_kind::float_literal, "3.14")});
check_equal_tokens("3.14f", {cpp_token(cpp_token_kind::float_literal, "3.14f")});
check_equal_tokens("3.14L", {cpp_token(cpp_token_kind::float_literal, "3.14L")});
// missing parts
check_equal_tokens(".5", {cpp_token(cpp_token_kind::float_literal, ".5")});
check_equal_tokens("1.", {cpp_token(cpp_token_kind::float_literal, "1.")});
// exponents
check_equal_tokens("1.0e4", {cpp_token(cpp_token_kind::float_literal, "1.0e4")});
check_equal_tokens("1e4", {cpp_token(cpp_token_kind::float_literal, "1e4")});
check_equal_tokens(".5e-2", {cpp_token(cpp_token_kind::float_literal, ".5e-2")});
// hexadecimal
check_equal_tokens("0xabc.def", {cpp_token(cpp_token_kind::float_literal, "0xabc.def")});
check_equal_tokens("0x123p42", {cpp_token(cpp_token_kind::float_literal, "0x123p42")});
}
SECTION("character literals")
{
check_equal_tokens(R"('a')", {cpp_token(cpp_token_kind::char_literal, R"('a')")});
check_equal_tokens(R"(u8'a')", {cpp_token(cpp_token_kind::char_literal, R"(u8'a')")});
check_equal_tokens(R"(U'a')", {cpp_token(cpp_token_kind::char_literal, R"(U'a')")});
check_equal_tokens(R"('\'')", {cpp_token(cpp_token_kind::char_literal, R"('\'')")});
}
SECTION("string literals")
{
check_equal_tokens(R"("hello")", {cpp_token(cpp_token_kind::string_literal, R"("hello")")});
check_equal_tokens(R"(u8"he\"llo")",
{cpp_token(cpp_token_kind::string_literal, R"(u8"he\"llo")")});
check_equal_tokens(R"*(R"(hel\"lo)")*",
{cpp_token(cpp_token_kind::string_literal, R"*(R"(hel\"lo)")*")});
check_equal_tokens(R"**(R"*(hello R"(foo)")*")**",
{cpp_token(cpp_token_kind::string_literal,
R"**(R"*(hello R"(foo)")*")**")});
}
SECTION("UDLs")
{
check_equal_tokens("123_foo", {cpp_token(cpp_token_kind::int_literal, "123_foo")});
check_equal_tokens("123.456_foo",
{cpp_token(cpp_token_kind::float_literal, "123.456_foo")});
check_equal_tokens(R"("hi"_foo)",
{cpp_token(cpp_token_kind::string_literal, R"("hi"_foo)")});
}
SECTION("identifiers")
{
check_equal_tokens("foo bar baz_a", {cpp_token(cpp_token_kind::identifier, "foo"),
cpp_token(cpp_token_kind::identifier, "bar"),
cpp_token(cpp_token_kind::identifier, "baz_a")});
check_equal_tokens("constant", {cpp_token(cpp_token_kind::identifier, "constant")});
}
SECTION("keywords")
{
// just test some
check_equal_tokens("const float auto", {cpp_token(cpp_token_kind::keyword, "const"),
cpp_token(cpp_token_kind::keyword, "float"),
cpp_token(cpp_token_kind::keyword, "auto")});
}
SECTION("punctuations")
{
// just test munch things
check_equal_tokens("<< <= <", {cpp_token(cpp_token_kind::punctuation, "<<"),
cpp_token(cpp_token_kind::punctuation, "<="),
cpp_token(cpp_token_kind::punctuation, "<")});
check_equal_tokens("- -- -> ->*", {cpp_token(cpp_token_kind::punctuation, "-"),
cpp_token(cpp_token_kind::punctuation, "--"),
cpp_token(cpp_token_kind::punctuation, "->"),
cpp_token(cpp_token_kind::punctuation, "->*")});
check_equal_tokens("--->>>>", {cpp_token(cpp_token_kind::punctuation, "--"),
cpp_token(cpp_token_kind::punctuation, "->"),
cpp_token(cpp_token_kind::punctuation, ">>"),
cpp_token(cpp_token_kind::punctuation, ">")});
// alternative spellings
check_equal_tokens("and not xor", {cpp_token(cpp_token_kind::punctuation, "&&"),
cpp_token(cpp_token_kind::punctuation, "!"),
cpp_token(cpp_token_kind::punctuation, "^")});
// digraphs
check_equal_tokens("<% foo<::bar>", {cpp_token(cpp_token_kind::punctuation, "{"),
cpp_token(cpp_token_kind::identifier, "foo"),
cpp_token(cpp_token_kind::punctuation, "<"),
cpp_token(cpp_token_kind::punctuation, "::"),
cpp_token(cpp_token_kind::identifier, "bar"),
cpp_token(cpp_token_kind::punctuation, ">")});
}
}

View file

@ -334,7 +334,7 @@ typedef decltype(0) w;
return cpp_literal_expression::build(std::move(type), std::move(size));
else
return cpp_unexposed_expression::build(std::move(type),
cpp_token_string::from_string(std::move(size)));
cpp_token_string::tokenize(std::move(size)));
};
cpp_entity_index idx;
@ -507,7 +507,7 @@ typedef decltype(0) w;
{
auto type = cpp_decltype_type::build(
cpp_unexposed_expression::build(cpp_builtin_type::build(cpp_int),
cpp_token_string::from_string("0")));
cpp_token_string::tokenize("0")));
REQUIRE(equal_types(idx, alias.underlying_type(), *type));
}
else

View file

@ -101,14 +101,13 @@ int r[] = {0};
// unexposed due to implicit cast, I think
type_safe::ref(
*cpp_unexposed_expression::build(cpp_builtin_type::build(cpp_int),
cpp_token_string::from_string(
"42"))),
cpp_token_string::tokenize("42"))),
cpp_storage_class_none, false, false);
else if (var.name() == "c")
check_variable(var, *cpp_builtin_type::build(cpp_float),
type_safe::ref(
*cpp_unexposed_expression::build(cpp_builtin_type::build(cpp_float),
cpp_token_string::from_string(
cpp_token_string::tokenize(
"3.f+0.14f"))),
cpp_storage_class_none, false, false);
else if (var.name() == "d")
@ -126,8 +125,7 @@ int r[] = {0};
cpp_cv_const),
type_safe::ref(
*cpp_unexposed_expression::build(cpp_builtin_type::build(cpp_int),
cpp_token_string::from_string(
"12"))),
cpp_token_string::tokenize("12"))),
cpp_storage_class_none, true, false);
else if (var.name() == "i")
{
@ -147,7 +145,7 @@ int r[] = {0};
*cpp_unexposed_expression::build(cpp_user_defined_type::build(
cpp_type_ref(cpp_entity_id(""),
"bar")),
cpp_token_string::from_string(
cpp_token_string::tokenize(
"bar()"))),
cpp_storage_class_none, false, false);
return false;
@ -169,8 +167,7 @@ int r[] = {0};
check_variable(var, *cpp_auto_type::build(),
type_safe::ref(
*cpp_unexposed_expression::build(cpp_builtin_type::build(cpp_int),
cpp_token_string::from_string(
"128"))),
cpp_token_string::tokenize("128"))),
cpp_storage_class_none, false, false);
else if (var.name() == "n")
check_variable(var,
@ -180,14 +177,13 @@ int r[] = {0};
cpp_ref_lvalue),
type_safe::ref(
*cpp_unexposed_expression::build(cpp_builtin_type::build(cpp_int),
cpp_token_string::from_string(
"m"))),
cpp_token_string::tokenize("m"))),
cpp_storage_class_none, false, false);
else if (var.name() == "o")
check_variable(var,
*cpp_decltype_type::build(
cpp_unexposed_expression::build(cpp_builtin_type::build(cpp_int),
cpp_token_string::from_string("0"))),
cpp_token_string::tokenize("0"))),
nullptr, cpp_storage_class_none, false, false);
else if (var.name() == "p")
check_variable(var,
@ -196,13 +192,12 @@ int r[] = {0};
build(cpp_decltype_type::build(
cpp_unexposed_expression::
build(cpp_builtin_type::build(cpp_int),
cpp_token_string::from_string("o"))),
cpp_token_string::tokenize("o"))),
cpp_cv_const),
cpp_ref_lvalue),
type_safe::ref(
*cpp_unexposed_expression::build(cpp_builtin_type::build(cpp_int),
cpp_token_string::from_string(
"o"))),
cpp_token_string::tokenize("o"))),
cpp_storage_class_none, false, false);
else if (var.name() == "q")
check_variable(var,
@ -219,8 +214,7 @@ int r[] = {0};
"1")),
type_safe::ref(
*cpp_unexposed_expression::build(cpp_unexposed_type::build(""),
cpp_token_string::from_string(
"{0}"))),
cpp_token_string::tokenize("{0}"))),
cpp_storage_class_none, false, false);
else
REQUIRE(false);