diff --git a/Source/DoxygenTranslator/src/DoxygenParser.cpp b/Source/DoxygenTranslator/src/DoxygenParser.cpp
index 59b47e46f..846da261b 100644
--- a/Source/DoxygenTranslator/src/DoxygenParser.cpp
+++ b/Source/DoxygenTranslator/src/DoxygenParser.cpp
@@ -960,98 +960,10 @@ DoxygenEntityList DoxygenParser::createTree(const std::string &doxygenBlob,
}
-/**
- * This is one of the most important methods - it breaks the original
- * doxygen comment into tokens - one token per word.
- * See replacement, which also handles html comments below.
-DoxygenParser::TokenList DoxygenParser::tokenizeDoxygenComment(const std::string &doxygenComment,
- const std::string &fileName,
- int fileLine) {
- TokenList tokList;
- m_fileLineNo = fileLine;
- m_fileName = fileName;
-
- bool isPlainString = false;
- string::size_type pos, lastPos = 0;
- char prevChar = doxygenComment[lastPos];
- string currentWord;
- while (true) {
- isPlainString = false;
- pos = doxygenComment.find_first_of("\\@\t\n ", lastPos);
- if (pos == string::npos) {
- pos = doxygenComment.size();
- // } else {
- // preserve whitespaces
- // while (pos != string::npos && (doxygenComment[pos] == ' ' || doxygenComment[pos] == '\t')) {
- // pos++;
- // }
- // if (pos == string::npos) {
- // pos = doxygenComment.size();
- // }
- }
-
- currentWord = doxygenComment.substr(lastPos, pos-lastPos);
-
- if (prevChar == '\n') {
-
- tokList.push_back(Token(END_LINE, "\n"));
-
- } else if (prevChar == '\\' || prevChar == '@') {
- // it's a doxygen command
- // hack to get commands like \\ or \@ or @\ or @@
- if (doxygenComment[pos] == '@' || doxygenComment[pos] == '\\') {
- currentWord += doxygenComment[pos];
- pos++;
- }
- // also strip the command till the first non-alpha char
- for (size_t i = 2; i < currentWord.size(); i++) {
- if (!isalpha(currentWord[i])) {
- currentWord = currentWord.substr(0, i);
- // set current parsing pos back, to parse the rest of the command
- pos = lastPos + i - 1;
- break;
- }
- }
-
- if (findCommand(currentWord)) {
- tokList.push_back(Token(COMMAND, currentWord));
- } else {
- // unknown commands are not translated - treated as literal string
- tokList.push_back(Token(PLAINSTRING, currentWord));
- }
-
- } else if (currentWord.size() && (currentWord[0] == '!' || currentWord[0] == '*' || currentWord[0] == '/')) {
-
- // check if it's one of the '!!!', '***', '///' of any length
- char c = currentWord[0];
- isPlainString = false;
- for (size_t i = 0; i < currentWord.size(); i++)
- if (currentWord[i] != c) {
- isPlainString = true;
- break;
- }
- } else {
- isPlainString = true;
- }
-
- if (isPlainString && currentWord.size()) {
- tokList.push_back(Token(PLAINSTRING, currentWord));
- }
-
- prevChar = doxygenComment[pos];
- lastPos = pos + 1;
- if (lastPos >= doxygenComment.size())
- break;
- }
-
- m_tokenListIt = tokList.begin();
-
- return tokList;
-}
+/*
+ * Splits 'text' on 'separator' chars. Separator chars are not part of the
+ * strings.
*/
-
-
-// Splits 'text' on 'separator' chars. Separator chars are not part of the strings.
DoxygenParser::StringVector DoxygenParser::split(const std::string &text, char separator)
{
StringVector lines;
@@ -1067,12 +979,21 @@ DoxygenParser::StringVector DoxygenParser::split(const std::string &text, char s
}
+/*
+ * Returns true, if 'c' is one of doxygen comment block start
+ * characters: *, /, or !
+ */
bool DoxygenParser::isStartOfDoxyCommentChar(char c)
{
return (strchr("*/!", c) != NULL);
}
+/*
+ * Adds token with Doxygen command to token list, but only if command is one of
+ * Doxygen commands. In that case true is returned. If the command is not
+ * recognized as a doxygen command, it is ignored and false is returned.
+ */
bool DoxygenParser::addDoxyCommand(DoxygenParser::TokenList &tokList,
const std::string &cmd) {
if (findCommand(cmd)) {
@@ -1090,11 +1011,16 @@ bool DoxygenParser::addDoxyCommand(DoxygenParser::TokenList &tokList,
}
+/*
+ * This method copies comment text to output as it is - no processing is
+ * done, Doxygen commands are ignored. It is used for commands \verbatim,
+ * \htmlonly, \f$, \f[, and \f{.
+ */
size_t DoxygenParser::processVerbatimText(size_t pos, const std::string &line)
{
- if (line[pos] == '\\' || line[pos] == '@') {
+ if (line[pos] == '\\' || line[pos] == '@') { // check for end commands
pos++;
- // characters '$[]{}' are used in commands \f$, \f[, ...
+ // characters '$[]{}' are used in commands \f$, \f[, and \f{
size_t endOfWordPos = line.find_first_not_of("abcdefghijklmnopqrstuvwxyz$[]{}", pos);
string cmd = line.substr(pos, endOfWordPos - pos);
@@ -1120,155 +1046,194 @@ size_t DoxygenParser::processVerbatimText(size_t pos, const std::string &line)
}
-size_t DoxygenParser::processNormalComment(size_t pos, const std::string &line)
+/*
+ * Processes doxy commands for escaped characters: \$ \@ \\ \& \~ \< \> \# \% \" \. \::
+ * Handling this separately supports documentation text like \@someText.
+ */
+bool DoxygenParser::processEscapedChars(size_t &pos, const std::string &line)
{
- switch (line[pos]) {
- case '\\':
- case '@': {
- // process doxy commands for escaped characters - handling this separately
- // supports documentation text like \@someText
- if ((pos + 1) < line.size()) {
+ if ((pos + 1) < line.size()) {
- // \ and @ with trailing whitespace or quoted get to output as plain string
- string whitespaces = " '\t\n";
- if (whitespaces.find(line[pos + 1]) != string::npos) {
- m_tokenList.push_back(Token(PLAINSTRING, line.substr(pos, 1)));
- pos++;
- break;
- }
-
- // these chars can be escaped for doxygen
- string escapedChars = "$@\\&~<>#%\".";
- if (escapedChars.find(line[pos + 1]) != string::npos) {
- addDoxyCommand(m_tokenList, line.substr(pos + 1, 1));
- pos += 2;
- break;
- } else if ((pos + 2) < line.size() &&
- line[pos + 1] == ':' && line[pos + 2] == ':') {
- // add command \:: - handling this separately supports documentation
- // text like \::someText
- addDoxyCommand(m_tokenList, line.substr(pos + 1, 2));
- pos += 3;
- break;
- }
- }
- // handle word commands and \f[, \f$, ... commands
+ // \ and @ with trailing whitespace or quoted get to output as plain string
+ string whitespaces = " '\t\n";
+ if (whitespaces.find(line[pos + 1]) != string::npos) {
+ m_tokenList.push_back(Token(PLAINSTRING, line.substr(pos, 1)));
pos++;
- // characters '$[]{}' are used in commands \f$, \f[, ...
- size_t endOfWordPos = line.find_first_not_of("abcdefghijklmnopqrstuvwxyz$[]{}", pos);
- string cmd = line.substr(pos , endOfWordPos - pos);
- addDoxyCommand(m_tokenList, cmd);
- if (cmd == CMD_HTML_ONLY || cmd == CMD_VERBATIM ||
- cmd == CMD_LATEX_1 || cmd == CMD_LATEX_2 || cmd == CMD_LATEX_3) {
- m_isVerbatimText = true;
- } else {
- // skip any possible spaces after command, because some commands have parameters,
- // and spaces between command and parameter must be ignored.
- if (endOfWordPos != string::npos) {
- endOfWordPos = line.find_first_not_of(" \t", endOfWordPos);
- }
- }
- pos = endOfWordPos;
- } break;
-
- case ' ': // whitespace
- case '\t': {
- // whitespaces are stored as plain strings
- size_t startOfNextWordPos = line.find_first_not_of(" \t", pos + 1);
- m_tokenList.push_back(Token(PLAINSTRING,
- line.substr(pos, startOfNextWordPos - pos)));
- pos = startOfNextWordPos;
- } break;
-
- case '<': { // process html commands
- bool isEndHtmlTag = false;
- pos++;
- if (line.size() > pos && line[pos] == '/') {
- isEndHtmlTag = true;
- pos++;
- }
-
- size_t endHtmlPos = line.find_first_of("\t >", pos);
-
- // prepend '<' to distinguish HTML tags from doxygen commands
- string cmd = line.substr(pos, endHtmlPos - pos);
- pos = endHtmlPos;
-
- if (addDoxyCommand(m_tokenList, '<' + cmd)) {
- // it is a valid HTML command
- if (line[pos] != '>') { // it should be HTML tag with args,
- // for example ,
, ...
- if (isEndHtmlTag) {
- m_tokenListIt = m_tokenList.end();
- printListError(WARN_DOXYGEN_COMMAND_ERROR, "Illegal end HTML tag without '>' found! Tag: " + cmd);
- }
- endHtmlPos = line.find(">", pos);
- if (endHtmlPos == string::npos) {
- m_tokenListIt = m_tokenList.end();
- printListError(WARN_DOXYGEN_COMMAND_ERROR, "HTML tag without '>' found! Tag: " + cmd);
- }
- // add args of HTML command, like link URL, image URL, ...
- m_tokenList.push_back(Token(PLAINSTRING,
- line.substr(pos, endHtmlPos - pos)));
- pos = endHtmlPos;
- } else {
- if (isEndHtmlTag) {
- // it is a simple tag, so push empty string
- m_tokenList.push_back(Token(PLAINSTRING, END_HTML_TAG_MARK));
- } else {
- // it is a simple tag, so push empty string
- m_tokenList.push_back(Token(PLAINSTRING, ""));
- }
- }
- pos++; // skip '>'
- } else {
- // the command is not HTML supported by Doxygen, < and > will be
- // replaced by HTML entities < and > respectively,
- // but only if 'htmlOnly' flag == false. The flag is set/reset by \htmlonly \verbatim,
- // \endhtmlonly \endverbatim Doxygen commands.
- addDoxyCommand(m_tokenList, "<");
- m_tokenList.push_back(Token(PLAINSTRING, cmd));
- }
- } break;
- case '>': // this char is detected here only when it is not part of HTML tag
- addDoxyCommand(m_tokenList, ">");
- pos++;
- break;
- case '&': { // process HTML entities
- size_t endOfWordPos = line.find_first_not_of("abcdefghijklmnopqrstuvwxyz", pos + 1);
- if (endOfWordPos != string::npos) {
- if (line[endOfWordPos] == ';') {
- // if entity is not recognized by Doxygen (not in the list of
- // commands) nothing is added (here and in Doxygen).
- addDoxyCommand(m_tokenList, line.substr(pos, endOfWordPos - pos));
- endOfWordPos++; // skip ';'
- } else {
- // it is not an entity - add entity for ampersand and
- // the rest of string
- addDoxyCommand(m_tokenList, "&");
- m_tokenList.push_back(Token(PLAINSTRING,
- line.substr(pos + 1, endOfWordPos - pos - 1)));
- }
- }
- pos = endOfWordPos;
- }
- break;
- case '"':
- m_isInQuotedString = true;
- m_tokenList.push_back(Token(PLAINSTRING, "\""));
- pos++;
- break;
- default:
- m_tokenListIt = m_tokenList.end();
- printListError(WARN_DOXYGEN_COMMAND_ERROR, "Unknown special character: " + line[pos]);
+ return true;
}
- return pos;
+ // these chars can be escaped for doxygen
+ string escapedChars = "$@\\&~<>#%\".";
+ if (escapedChars.find(line[pos + 1]) != string::npos) {
+ addDoxyCommand(m_tokenList, line.substr(pos + 1, 1));
+ pos += 2;
+ return true;
+ } else if ((pos + 2) < line.size() &&
+ line[pos + 1] == ':' && line[pos + 2] == ':') {
+ // add command \:: - handling this separately supports documentation
+ // text like \::someText
+ addDoxyCommand(m_tokenList, line.substr(pos + 1, 2));
+ pos += 3;
+ return true;
+ }
+ }
+ return false;
}
-/**
- * This method tokenizes Doxygen comment to words and doxygen commands.
+/*
+ * Processes word doxygen commands, like \arg, \c, \b, \return, ...
+ */
+void DoxygenParser::processWordCommands(size_t &pos, const std::string &line)
+{
+ pos++;
+ // characters '$[]{}' are used in commands \f$, \f[, ...
+ size_t endOfWordPos = line.find_first_not_of("abcdefghijklmnopqrstuvwxyz$[]{}", pos);
+ string cmd = line.substr(pos , endOfWordPos - pos);
+ addDoxyCommand(m_tokenList, cmd);
+ if (cmd == CMD_HTML_ONLY || cmd == CMD_VERBATIM ||
+ cmd == CMD_LATEX_1 || cmd == CMD_LATEX_2 || cmd == CMD_LATEX_3) {
+ m_isVerbatimText = true;
+ } else {
+ // skip any possible spaces after command, because some commands have parameters,
+ // and spaces between command and parameter must be ignored.
+ if (endOfWordPos != string::npos) {
+ endOfWordPos = line.find_first_not_of(" \t", endOfWordPos);
+ }
+ }
+ pos = endOfWordPos;
+}
+
+
+void DoxygenParser::processHtmlTags(size_t &pos, const std::string &line)
+{
+ bool isEndHtmlTag = false;
+ pos++;
+ if (line.size() > pos && line[pos] == '/') {
+ isEndHtmlTag = true;
+ pos++;
+ }
+
+ size_t endHtmlPos = line.find_first_of("\t >", pos);
+
+ string cmd = line.substr(pos, endHtmlPos - pos);
+ pos = endHtmlPos;
+
+ // prepend '<' to distinguish HTML tags from doxygen commands
+ if (addDoxyCommand(m_tokenList, '<' + cmd)) {
+ // it is a valid HTML command
+ if (line[pos] != '>') { // it should be HTML tag with args,
+ // for example ,
, ...
+ if (isEndHtmlTag) {
+ m_tokenListIt = m_tokenList.end();
+ printListError(WARN_DOXYGEN_COMMAND_ERROR,
+ "Illegal end HTML tag without '>' found! Tag: " + cmd);
+ }
+
+ endHtmlPos = line.find(">", pos);
+ if (endHtmlPos == string::npos) {
+ m_tokenListIt = m_tokenList.end();
+ printListError(WARN_DOXYGEN_COMMAND_ERROR,
+ "HTML tag without '>' found! Tag: " + cmd);
+ }
+
+ // add args of HTML command, like link URL, image URL, ...
+ m_tokenList.push_back(Token(PLAINSTRING, line.substr(pos, endHtmlPos - pos)));
+ pos = endHtmlPos;
+ } else {
+ if (isEndHtmlTag) {
+ m_tokenList.push_back(Token(PLAINSTRING, END_HTML_TAG_MARK));
+ } else {
+ // it is a simple tag, so push empty string
+ m_tokenList.push_back(Token(PLAINSTRING, ""));
+ }
+ }
+ pos++; // skip '>'
+ } else {
+ // the command is not HTML supported by Doxygen, < and > will be
+ // replaced by HTML entities < and > respectively,
+ addDoxyCommand(m_tokenList, "<");
+ m_tokenList.push_back(Token(PLAINSTRING, cmd));
+ }
+}
+
+
+void DoxygenParser::processHtmlEntities(size_t &pos, const std::string &line)
+{
+ size_t endOfWordPos = line.find_first_not_of("abcdefghijklmnopqrstuvwxyz", pos + 1);
+
+ if (endOfWordPos != string::npos) {
+
+ if (line[endOfWordPos] == ';')
+ {
+ // if entity is not recognized by Doxygen (not in the list of
+ // commands) nothing is added (here and in Doxygen).
+ addDoxyCommand(m_tokenList, line.substr(pos, endOfWordPos - pos));
+ endOfWordPos++; // skip ';'
+
+ } else {
+
+ // it is not an entity - add entity for ampersand and the rest of string
+ addDoxyCommand(m_tokenList, "&");
+ m_tokenList.push_back(Token(PLAINSTRING,
+ line.substr(pos + 1, endOfWordPos - pos - 1)));
+ }
+ }
+ pos = endOfWordPos;
+}
+
+
+/*
+ * This method processes normal comment, which has to be tokenized.
+ */
+size_t DoxygenParser::processNormalComment(size_t pos, const std::string &line)
+{
+ switch (line[pos]) {
+ case '\\':
+ case '@':
+ if (processEscapedChars(pos, line)) {
+ break;
+ }
+ // handle word commands \arg, \c, \return, ... and \f[, \f$, ... commands
+ processWordCommands(pos, line);
+ break;
+
+ case ' ': // whitespace
+ case '\t': {
+ // whitespaces are stored as plain strings
+ size_t startOfNextWordPos = line.find_first_not_of(" \t", pos + 1);
+ m_tokenList.push_back(Token(PLAINSTRING,
+ line.substr(pos, startOfNextWordPos - pos)));
+ pos = startOfNextWordPos;
+ } break;
+
+ case '<':
+ processHtmlTags(pos, line);
+ break;
+ case '>': // this char is detected here only when it is not part of HTML tag
+ addDoxyCommand(m_tokenList, ">");
+ pos++;
+ break;
+ case '&':
+ processHtmlEntities(pos, line);
+ break;
+ case '"':
+ m_isInQuotedString = true;
+ m_tokenList.push_back(Token(PLAINSTRING, "\""));
+ pos++;
+ break;
+ default:
+ m_tokenListIt = m_tokenList.end();
+ printListError(WARN_DOXYGEN_COMMAND_ERROR, "Unknown special character: " + line[pos]);
+ }
+
+ return pos;
+}
+
+
+/*
+ * This is the main method, which tokenizes Doxygen comment to words and
+ * doxygen commands.
*/
void DoxygenParser::tokenizeDoxygenComment(const std::string &doxygenComment,
const std::string &fileName,
@@ -1315,18 +1280,23 @@ void DoxygenParser::tokenizeDoxygenComment(const std::string &doxygenComment,
}
pos = doxyCmdOrHtmlTagPos;
- if (pos != string::npos) {
- if (m_isVerbatimText) {
- pos = processVerbatimText(pos, line);
+ if (pos != string::npos)
+ {
+ if (m_isVerbatimText)
+ {
+ pos = processVerbatimText(pos, line);
+
} else if (m_isInQuotedString) {
- if (line[pos] == '"') {
- m_isInQuotedString = false;
- }
- m_tokenList.push_back(Token(PLAINSTRING,
+
+ if (line[pos] == '"') {
+ m_isInQuotedString = false;
+ }
+ m_tokenList.push_back(Token(PLAINSTRING,
line.substr(pos, 1)));
- pos++;
+ pos++;
+
} else {
- pos = processNormalComment(pos, line);
+ pos = processNormalComment(pos, line);
}
}
}
diff --git a/Source/DoxygenTranslator/src/DoxygenParser.h b/Source/DoxygenTranslator/src/DoxygenParser.h
index adc5c7f26..06c14de04 100644
--- a/Source/DoxygenTranslator/src/DoxygenParser.h
+++ b/Source/DoxygenTranslator/src/DoxygenParser.h
@@ -354,6 +354,12 @@ private:
/** Processes comment when \htmlonly and \verbatim commands are encountered. */
size_t processVerbatimText(size_t pos, const std::string &line);
+ bool processEscapedChars(size_t &pos, const std::string &line);
+ void processWordCommands(size_t &pos, const std::string &line);
+ void processHtmlTags(size_t &pos, const std::string &line);
+ void processHtmlEntities(size_t &pos, const std::string &line);
+
+
/** Processes comment outside \htmlonly and \verbatim commands. */
size_t processNormalComment(size_t pos, const std::string &line);
diff --git a/Source/DoxygenTranslator/src/JavaDocConverter.cpp b/Source/DoxygenTranslator/src/JavaDocConverter.cpp
index db5ba7e34..22a3ccc96 100644
--- a/Source/DoxygenTranslator/src/JavaDocConverter.cpp
+++ b/Source/DoxygenTranslator/src/JavaDocConverter.cpp
@@ -56,10 +56,10 @@ void JavaDocConverter::fillStaticTables() {
*
* entities must be translated - remain in Java, something meaningfull in Python (<, ...)
*
- * - whitespaces in tests
* - Python
- * - add comments also to auto-generated methods lilke equals(), delete() in Java,
+ * - add comments also to auto-generated methods like equals(), delete() in Java,
* and methods for std::vector(), ...
+ * Commenting methods of std types is simple - add comment to std_*.i file.
*/