diff --git a/Source/DoxygenTranslator/src/DoxygenCommands.h b/Source/DoxygenTranslator/src/DoxygenCommands.h index 10ebca862..154efc53f 100644 --- a/Source/DoxygenTranslator/src/DoxygenCommands.h +++ b/Source/DoxygenTranslator/src/DoxygenCommands.h @@ -2,10 +2,18 @@ #ifndef DOXYGENCOMMANDS_H #define DOXYGENCOMMANDS_H +const char *CMD_HTML_ONLY = "htmlonly"; +const char *CMD_VERBATIM = "verbatim"; +const char *CMD_END_HTML_ONLY = "endhtmlonly"; +const char *CMD_END_VERBATIM = "endverbatim"; + const char *sectionIndicators[] = { - "attention", "author", "authors", "brief", "bug", "cond", "date", "deprecated", "details", - "else", "elseif", "endcond", "endif", "exception", "if", "ifnot", "invariant", "note", "par", "param", - "tparam", "post", "pre", "remarks", "remark", "result", "return", "returns", "retval", "sa", "see", "since", "test", "throw", "throws", "todo", "version", "warning", "xrefitem"}; + "attention", "author", "authors", "brief", "bug", "cond", "date", + "deprecated", "details", "else", "elseif", "endcond", "endif", + "exception", "if", "ifnot", "invariant", "note", "par", "param", + "tparam", "post", "pre", "remarks", "remark", "result", "return", + "returns", "retval", "sa", "see", "since", "test", "throw", "throws", + "todo", "version", "warning", "xrefitem"}; const int sectionIndicatorsSize = sizeof(sectionIndicators) / sizeof(*sectionIndicators); /* All of the doxygen commands divided up by how they are parsed */ @@ -17,23 +25,27 @@ const char *simpleCommands[] = { const int simpleCommandsSize = sizeof(simpleCommands) / sizeof(*simpleCommands); const char *commandWords[] = { - "a", "b", "c", "e", "em", "p", "def", "enum", "package", "relates", "namespace", "relatesalso", "anchor", "dontinclude", "include", - "includelineno", "copydoc", "copybrief", "copydetails", "verbinclude", "htmlinclude", "extends", "implements", "memberof", "related", - "relatedalso", "cite"}; + "a", "b", "c", "e", "em", "p", "def", "enum", "package", "relates", + "namespace", "relatesalso", "anchor", "dontinclude", "include", + "includelineno", "copydoc", "copybrief", "copydetails", "verbinclude", + "htmlinclude", "extends", "implements", "memberof", "related", "relatedalso", + "cite"}; const int commandWordsSize = sizeof(commandWords) / sizeof(*commandWords); const char *commandLines[] = { - "addindex", "fn", "name", "line", "var", "skipline", "typedef", "skip", "until", "property"}; + "addindex", "fn", "name", "line", "var", "skipline", "typedef", "skip", + "until", "property"}; const int commandLinesSize = sizeof(commandLines) / sizeof(*commandLines); const char *commandParagraph[] = { - "partofdescription", "result", "return", "returns", "remarks", "remark", "since", "test", "sa", "see", "pre", "post", "details", "invariant", - "deprecated", "date", "note", "warning", "version", "todo", "bug", "attention", "brief", "author", "authors", - "copyright", "short"}; + "partofdescription", "result", "return", "returns", "remarks", "remark", + "since", "test", "sa", "see", "pre", "post", "details", "invariant", + "deprecated", "date", "note", "warning", "version", "todo", "bug", + "attention", "brief", "author", "authors", "copyright", "short"}; const int commandParagraphSize = sizeof(commandParagraph) / sizeof(*commandParagraph); const char *commandEndCommands[] = { - "htmlonly", "latexonly", "manonly", "xmlonly", "link", "rtfonly"}; + CMD_HTML_ONLY, "latexonly", "manonly", "xmlonly", "link", "rtfonly"}; const int commandEndCommandsSize = sizeof(commandEndCommands) / sizeof(*commandEndCommands); const char *commandWordParagraphs[] = { @@ -41,7 +53,8 @@ const char *commandWordParagraphs[] = { const int commandWordParagraphsSize = sizeof(commandWordParagraphs) / sizeof(*commandWordParagraphs); const char *commandWordLines[] = { - "page", "subsection", "subsubsection", "section", "paragraph", "defgroup", "snippet", "mainpage"}; + "page", "subsection", "subsubsection", "section", "paragraph", "defgroup", + "snippet", "mainpage"}; const int commandWordLinesSize = sizeof(commandWordLines) / sizeof(*commandWordLines); const char *commandWordOWordOWords[] = { @@ -61,7 +74,48 @@ const int commandErrorThrowingsSize = sizeof(commandErrorThrowings) / sizeof(*co const char *commandUniques[] = { "xrefitem", "arg", "ingroup", "par", "headerfile", "overload", "weakgroup", "ref", "subpage", "dotfile", "image", "addtogroup", "li", - "if", "ifnot", "elseif", "else", "mscfile", "code", "verbatim", "f{", "f[", "f$", "dot", "msc"}; + "if", "ifnot", "elseif", "else", "mscfile", "code", CMD_VERBATIM, "f{", "f[", "f$", "dot", "msc"}; const int commandUniquesSize = sizeof(commandUniques) / sizeof(*commandUniques); +// These HTML commands are transformed when producing output in other formats. +// Other commands are left intact, but '<' and '> are replaced with entities in HTML +// output. So appears as <varName> in HTML output. The same +// behavior must be repeated by SWIG. See Doxygen doc for the list of commands. +// '<' and '>' are used to differentiate HTML commands from doxygen commands. +const char *htmlCommands[] = { + "", "", "
", "", "
", "
", "", "", "", "", + "
", "
", "
", "", "
", "
", "

", "

", "

", "", "", "", + "
  • ", "", "", "
      ", "

      ", "

      ", "", "", "",
      +  "", "", "", "", "", "", "
        ", "" +}; + +const int htmlCommandsSize = sizeof(htmlCommands) / sizeof(*htmlCommands); + +// Only entities which are translatable to plain text are used here. Others +// are copied unchanged to output. +const char *htmlEntities[] = { "©", "&trade", "®", // (C), (TM), (R) +"<", // less-than symbol +">", // greater-than symbol +"&", // ampersand +"&apos", // single quotation mark (straight) +""", // double quotation mark (straight) +"&lsquo", // left single quotation mark +"&rsquo", // right single quotation mark +"&ldquo", // left double quotation mark +"&rdquo", // right double quotation mark +"&ndash", // n-dash (for numeric ranges, e.g. 2–8) +"&mdash", // -- +" ", // +"×", // x +"&minus", // - +"&sdot", // . +"&sim", // ~ +"&le", // <= +"&ge", // >= +"&larr", // <-- +"&rarr" // --> +}; + +const int htmlEntitiesSize = sizeof(htmlEntities) / sizeof(*htmlEntities); + #endif diff --git a/Source/DoxygenTranslator/src/DoxygenEntity.cpp b/Source/DoxygenTranslator/src/DoxygenEntity.cpp index 7b0088e06..9e761c32b 100644 --- a/Source/DoxygenTranslator/src/DoxygenEntity.cpp +++ b/Source/DoxygenTranslator/src/DoxygenEntity.cpp @@ -55,10 +55,10 @@ void DoxygenEntity::printEntity(int level) const { cout << "\t"; } - cout << "Node Command: " << typeOfEntity << " "; + cout << "Node Leaf Command: '" << typeOfEntity << "', "; if (!data.empty()) { - cout << "Node Data: " << data; + cout << "Node Data: '" << data << "'"; } cout << std::endl; @@ -68,7 +68,7 @@ void DoxygenEntity::printEntity(int level) const { cout << "\t"; } - cout << "Node Command : " << typeOfEntity << std::endl; + cout << "Node Command: '" << typeOfEntity << "'" << std::endl; thisLevel++; diff --git a/Source/DoxygenTranslator/src/DoxygenParser.cpp b/Source/DoxygenTranslator/src/DoxygenParser.cpp index 346e94cc2..7ec0ae261 100644 --- a/Source/DoxygenTranslator/src/DoxygenParser.cpp +++ b/Source/DoxygenTranslator/src/DoxygenParser.cpp @@ -16,6 +16,7 @@ #include #include +#include using std::string; using std::cout; @@ -28,7 +29,8 @@ std::set DoxygenParser::doxygenSectionIndicators; const int TOKENSPERLINE = 8; //change this to change the printing behaviour of the token list -DoxygenParser::DoxygenParser(bool noisy) : noisy(noisy) { +DoxygenParser::DoxygenParser(bool noisy) : noisy(noisy) +{ fillTables(); } @@ -36,37 +38,53 @@ DoxygenParser::~DoxygenParser() { } void DoxygenParser::fillTables() { - // run it only once - if (doxygenCommands.size()) - return; + // run it only once + if (doxygenCommands.size()) + return; - // fill in tables with data from DxygenCommands.h - for (int i = 0; i < simpleCommandsSize; i++) - doxygenCommands[simpleCommands[i]] = SIMPLECOMMAND; - for (int i = 0; i < commandWordsSize; i++) - doxygenCommands[commandWords[i]] = COMMANDWORD; - for (int i = 0; i < commandLinesSize; i++) - doxygenCommands[commandLines[i]] = COMMANDLINE; - for (int i = 0; i < commandParagraphSize; i++) - doxygenCommands[commandParagraph[i]] = COMMANDPARAGRAPH; - for (int i = 0; i < commandEndCommandsSize; i++) - doxygenCommands[commandEndCommands[i]] = COMMANDENDCOMMAND; - for (int i = 0; i < commandWordParagraphsSize; i++) - doxygenCommands[commandWordParagraphs[i]] = COMMANDWORDPARAGRAPH; - for (int i = 0; i < commandWordLinesSize; i++) - doxygenCommands[commandWordLines[i]] = COMMANDWORDLINE; - for (int i = 0; i < commandWordOWordOWordsSize; i++) - doxygenCommands[commandWordOWordOWords[i]] = COMMANDWORDOWORDWORD; - for (int i = 0; i < commandOWordsSize; i++) - doxygenCommands[commandOWords[i]] = COMMANDOWORD; - for (int i = 0; i < commandErrorThrowingsSize; i++) - doxygenCommands[commandErrorThrowings[i]] = COMMANDERRORTHROW; - for (int i = 0; i < commandUniquesSize; i++) - doxygenCommands[commandUniques[i]] = COMMANDUNIQUE; + // fill in tables with data from DxygenCommands.h + for (int i = 0; i < simpleCommandsSize; i++) + doxygenCommands[simpleCommands[i]] = SIMPLECOMMAND; - // fill section indicators command set - for (int i = 0; i < sectionIndicatorsSize; i++) - doxygenSectionIndicators.insert(sectionIndicators[i]); + for (int i = 0; i < commandWordsSize; i++) + doxygenCommands[commandWords[i]] = COMMANDWORD; + + for (int i = 0; i < commandLinesSize; i++) + doxygenCommands[commandLines[i]] = COMMANDLINE; + + for (int i = 0; i < commandParagraphSize; i++) + doxygenCommands[commandParagraph[i]] = COMMANDPARAGRAPH; + + for (int i = 0; i < commandEndCommandsSize; i++) + doxygenCommands[commandEndCommands[i]] = COMMANDENDCOMMAND; + + for (int i = 0; i < commandWordParagraphsSize; i++) + doxygenCommands[commandWordParagraphs[i]] = COMMANDWORDPARAGRAPH; + + for (int i = 0; i < commandWordLinesSize; i++) + doxygenCommands[commandWordLines[i]] = COMMANDWORDLINE; + + for (int i = 0; i < commandWordOWordOWordsSize; i++) + doxygenCommands[commandWordOWordOWords[i]] = COMMANDWORDOWORDWORD; + + for (int i = 0; i < commandOWordsSize; i++) + doxygenCommands[commandOWords[i]] = COMMANDOWORD; + + for (int i = 0; i < commandErrorThrowingsSize; i++) + doxygenCommands[commandErrorThrowings[i]] = COMMANDERRORTHROW; + + for (int i = 0; i < commandUniquesSize; i++) + doxygenCommands[commandUniques[i]] = COMMANDUNIQUE; + + for (int i = 0; i < htmlCommandsSize; i++) + doxygenCommands[htmlCommands[i]] = COMMANDUNIQUE; + + for (int i = 0; i < commandUniquesSize; i++) + doxygenCommands[commandUniques[i]] = COMMANDUNIQUE; + + // fill section indicators command set + for (int i = 0; i < sectionIndicatorsSize; i++) + doxygenSectionIndicators.insert(sectionIndicators[i]); } @@ -124,22 +142,58 @@ int DoxygenParser::commandBelongs(const std::string &theCommand) { } -std::string DoxygenParser::getNextWord(const TokenList &tokList) { - // MK Token nextToken = tokList.peek(); - if (m_tokenListIt == m_tokenList.end()) { +std::string DoxygenParser::trim(const std::string &text) +{ + size_t start = text.find_first_not_of(" \t"); + size_t end = text.find_last_not_of(" \t"); + + if (start == string::npos || start > end) { return ""; } - Token nextToken = *m_tokenListIt; - if (nextToken.m_tokenType == PLAINSTRING) { - // handle quoted strings as words - if (nextToken.m_tokenString[0] == '"' - && nextToken.m_tokenString[nextToken.m_tokenString.size() - 1] != '"') { + return text.substr(start, end - start + 1); +} - string word = nextToken.m_tokenString + " "; - nextToken = *m_tokenListIt++; + +bool DoxygenParser::isEndOfLine() +{ + if (m_tokenListIt == m_tokenList.end()) { + return false; + } + Token nextToken = *m_tokenListIt; + return nextToken.m_tokenType == END_LINE; +} + + +void DoxygenParser::skipWhitespaceTokens() +{ + if (m_tokenListIt == m_tokenList.end()) { + return; + } + + while (m_tokenListIt != m_tokenList.end() && + (m_tokenListIt->m_tokenType == END_LINE || trim(m_tokenListIt->m_tokenString).empty())) { + + m_tokenListIt++; + } +} + + +std::string DoxygenParser::getNextWord() { + +/* if (m_tokenListIt == m_tokenList.end()) { + return ""; + } +*/ + while (m_tokenListIt != m_tokenList.end() && (m_tokenListIt->m_tokenType == PLAINSTRING)) { + // handle quoted strings as words + if (m_tokenListIt->m_tokenString[0] == '"' + && m_tokenListIt->m_tokenString[m_tokenListIt->m_tokenString.size() - 1] != '"') { + + string word = m_tokenListIt->m_tokenString + " "; + m_tokenListIt++; while (true) { - string nextWord = getNextWord(tokList); - if (!nextWord.size()) {// maybe report unterminated string error + string nextWord = getNextWord(); + if (nextWord.empty()) { // maybe report unterminated string error return word; } word += nextWord; @@ -150,12 +204,60 @@ std::string DoxygenParser::getNextWord(const TokenList &tokList) { } } + string tokenStr = trim(m_tokenListIt->m_tokenString); m_tokenListIt++; - return nextToken.m_tokenString; - } + if (!tokenStr.empty()) { + return tokenStr; + } + } /* else if (nextToken.m_tokenType == END_LINE) { + // this handles cases when command is the last item in line, for example: + // * This method returns line number \c + // * relative to paragraph. + m_tokenListIt++; + return getNextWord(); + } */ + return ""; } +/* TODO remove this m. +std::string DoxygenParser::getNextWordInComment() { + + while (m_tokenListIt != m_tokenList.end() && (m_tokenListIt->m_tokenType == PLAINSTRING || m_tokenListIt->m_tokenType == END_LINE)) { + // handle quoted strings as words + if (m_tokenListIt->m_tokenString[0] == '"' + && m_tokenListIt->m_tokenString[m_tokenListIt->m_tokenString.size() - 1] != '"') { + + string word = m_tokenListIt->m_tokenString + " "; + while (true) { + string nextWord = getNextWord(); + if (nextWord.empty()) {// maybe report unterminated string error + return word; + } + word += nextWord; + if (word[word.size() - 1] == '"') { // strip quotes + return word.substr(1, word.size() - 2); + } + word += " "; + } + } + + string tokenStr = trim(m_tokenListIt->m_tokenString); + m_tokenListIt++; + if (!tokenStr.empty()) { + return tokenStr; + } + } * else if (nextToken.m_tokenType == END_LINE) { + // this handles cases when command is the last item in line, for example: + // * This method returns line number \c + // * relative to paragraph. + m_tokenListIt++; + return getNextWord(); + } * + + return ""; +} */ + DoxygenParser::TokenListCIt DoxygenParser::getOneLine(const TokenList &tokList) { @@ -183,7 +285,7 @@ std::string DoxygenParser::getStringTilCommand(const TokenList & tokList) { while (m_tokenListIt->m_tokenType == PLAINSTRING) { const Token ¤tToken = *m_tokenListIt++; if (currentToken.m_tokenType == PLAINSTRING) { - description = description + currentToken.m_tokenString + " "; + description = description + currentToken.m_tokenString; // + " "; } } return description; @@ -201,12 +303,10 @@ std::string DoxygenParser::getStringTilEndCommand(const std::string & theCommand //TODO: it won't output doxygen commands, need a way to fix it if (m_tokenListIt->m_tokenType == PLAINSTRING) { - description += m_tokenListIt->m_tokenString + " "; + description += m_tokenListIt->m_tokenString; } else if (m_tokenListIt->m_tokenType == END_LINE) { description += "\n"; - } - - if (m_tokenListIt->m_tokenString == theCommand) { + } else if (m_tokenListIt->m_tokenString == theCommand) { m_tokenListIt++; return description; } @@ -222,32 +322,33 @@ std::string DoxygenParser::getStringTilEndCommand(const std::string & theCommand DoxygenParser::TokenListCIt DoxygenParser::getEndOfParagraph(const TokenList & tokList) { - TokenListCIt endOfParagraph = m_tokenListIt; + TokenListCIt endOfParagraph = m_tokenListIt; - while (endOfParagraph != tokList.end()) { - if ((*endOfParagraph).m_tokenType == END_LINE) { - endOfParagraph++; - if (endOfParagraph != tokList.end() && (*endOfParagraph).m_tokenType == END_LINE) { - endOfParagraph++; - //cout << "ENCOUNTERED END OF PARA" << endl; - return endOfParagraph; - } + while (endOfParagraph != tokList.end()) { + if (endOfParagraph->m_tokenType == END_LINE) { + endOfParagraph++; + if (endOfParagraph != tokList.end() && endOfParagraph->m_tokenType == END_LINE) { + endOfParagraph++; + //cout << "ENCOUNTERED END OF PARA" << endl; + return endOfParagraph; + } - } else if ((*endOfParagraph).m_tokenType == COMMAND) { + } else if (endOfParagraph->m_tokenType == COMMAND) { - if (isSectionIndicator((*endOfParagraph).m_tokenString)) { - return endOfParagraph; - } else - endOfParagraph++; + if (isSectionIndicator(endOfParagraph->m_tokenString)) { + return endOfParagraph; + } else { + endOfParagraph++; + } - } else if ((*endOfParagraph).m_tokenType == PLAINSTRING) { - endOfParagraph++; - } else { - return tokList.end(); - } - } + } else if (endOfParagraph->m_tokenType == PLAINSTRING) { + endOfParagraph++; + } else { + return tokList.end(); + } + } - return tokList.end(); + return tokList.end(); } @@ -257,16 +358,16 @@ DoxygenParser::TokenListCIt DoxygenParser::getEndOfSection(const std::string & t TokenListCIt endOfParagraph = m_tokenListIt; while (endOfParagraph != tokList.end()) { - if ((*endOfParagraph).m_tokenType == COMMAND) { - if (theCommand == (*endOfParagraph).m_tokenString) + if (endOfParagraph->m_tokenType == COMMAND) { + if (theCommand == endOfParagraph->m_tokenString) return endOfParagraph; else endOfParagraph++; - } else if ((*endOfParagraph).m_tokenType == PLAINSTRING) { + } else if (endOfParagraph->m_tokenType == PLAINSTRING) { endOfParagraph++; - } else if ((*endOfParagraph).m_tokenType == END_LINE) { + } else if (endOfParagraph->m_tokenType == END_LINE) { endOfParagraph++; - if ((*endOfParagraph).m_tokenType == END_LINE) { + if (endOfParagraph->m_tokenType == END_LINE) { endOfParagraph++; return endOfParagraph; } @@ -293,11 +394,14 @@ DoxygenParser::TokenListCIt DoxygenParser::getEndCommand(const std::string & the return tokList.end(); } -/* DoxygenParser::TokenListIt DoxygenParser::getTilAnyCommand(const std::string &, - TokenList &) { - TokenListIt anIterator; - return anIterator; -} */ + +void DoxygenParser::skipEndOfLine() +{ + if (m_tokenListIt != m_tokenList.end() && + m_tokenListIt->m_tokenType == END_LINE) { + m_tokenListIt++; + } +} int DoxygenParser::addSimpleCommand(const std::string &theCommand, @@ -311,19 +415,25 @@ int DoxygenParser::addSimpleCommand(const std::string &theCommand, int DoxygenParser::addCommandWord(const std::string &theCommand, - const TokenList &tokList, + const TokenList &, DoxygenEntityList &doxyList) { if (noisy) cout << "Parsing " << theCommand << endl; - std::string name = getNextWord(tokList); - if (!name.empty()) { - DoxygenEntityList aNewList; - aNewList.push_back(DoxygenEntity("plainstd::string", name)); - doxyList.push_back(DoxygenEntity(theCommand, aNewList)); - return 1; + if (isEndOfLine()) { + // handles cases when command is at the end of line (for example "\c\nreally" + skipWhitespaceTokens(); + doxyList.push_back(DoxygenEntity("plainstd::endl")); + } + std::string name = getNextWord(); + if (!name.empty()) { + DoxygenEntityList aNewList; + aNewList.push_back(DoxygenEntity("plainstd::string", name)); + doxyList.push_back(DoxygenEntity(theCommand, aNewList)); + return 1; } else { - printListError(WARN_DOXYGEN_COMMAND_ERROR, "No word followed " + theCommand + " command. Not added"); + printListError(WARN_DOXYGEN_COMMAND_ERROR, "No word followed " + + theCommand + " command. Not added"); } return 0; } @@ -337,6 +447,7 @@ int DoxygenParser::addCommandLine(const std::string &theCommand, TokenListCIt endOfLine = getOneLine(tokList); DoxygenEntityList aNewList = parse(endOfLine, tokList); doxyList.push_back(DoxygenEntity(theCommand, aNewList)); + skipEndOfLine(); return 1; } @@ -374,31 +485,32 @@ int DoxygenParser::addCommandEndCommand(const std::string &theCommand, int DoxygenParser::addCommandWordParagraph(const std::string &theCommand, - const TokenList &tokList, - DoxygenEntityList &doxyList) { - if (noisy) - cout << "Parsing " << theCommand << endl; + const TokenList &tokList, + DoxygenEntityList &doxyList) { + if (noisy) + cout << "Parsing " << theCommand << endl; - std::string name = getNextWord(tokList); + std::string name = getNextWord(); - if (name.empty()) { - printListError(WARN_DOXYGEN_COMMAND_ERROR, "No word followed " + theCommand + " command. Not added"); - return 0; - } - TokenListCIt endOfParagraph = getEndOfParagraph(tokList); - DoxygenEntityList aNewList; - aNewList = parse(endOfParagraph, tokList); - aNewList.push_front(DoxygenEntity("plainstd::string", name)); - doxyList.push_back(DoxygenEntity(theCommand, aNewList)); - return 1; + if (name.empty()) { + printListError(WARN_DOXYGEN_COMMAND_ERROR, "No word followed " + theCommand + " command. Not added"); + return 0; + } + TokenListCIt endOfParagraph = getEndOfParagraph(tokList); + DoxygenEntityList aNewList; + aNewList = parse(endOfParagraph, tokList); + aNewList.push_front(DoxygenEntity("plainstd::string", name)); + doxyList.push_back(DoxygenEntity(theCommand, aNewList)); + return 1; } + int DoxygenParser::addCommandWordLine(const std::string &theCommand, const TokenList & tokList, DoxygenEntityList &doxyList) { if (noisy) cout << "Parsing " << theCommand << endl; - std::string name = getNextWord(tokList); + std::string name = getNextWord(); if (name.empty()) { printListError(WARN_DOXYGEN_COMMAND_ERROR, "No word followed " + theCommand + " command. Not added"); return 0; @@ -413,19 +525,20 @@ int DoxygenParser::addCommandWordLine(const std::string &theCommand, //else cout << "No line followed " << theCommand << " command. Not added" << endl; } + int DoxygenParser::addCommandWordOWordOWord(const std::string &theCommand, - const TokenList &tokList, + const TokenList &, DoxygenEntityList &doxyList) { if (noisy) cout << "Parsing " << theCommand << endl; - std::string name = getNextWord(tokList); + std::string name = getNextWord(); if (name.empty()) { printListError(WARN_DOXYGEN_COMMAND_ERROR, "No word followed " + theCommand + " command. Not added"); return 0; } - std::string headerfile = getNextWord(tokList); - std::string headername = getNextWord(tokList); + std::string headerfile = getNextWord(); + std::string headername = getNextWord(); DoxygenEntityList aNewList; aNewList.push_back(DoxygenEntity("plainstd::string", name)); if (!headerfile.empty()) @@ -438,12 +551,12 @@ int DoxygenParser::addCommandWordOWordOWord(const std::string &theCommand, int DoxygenParser::addCommandOWord(const std::string &theCommand, - const TokenList &tokList, + const TokenList &, DoxygenEntityList &doxyList) { if (noisy) cout << "Parsing " << theCommand << endl; - std::string name = getNextWord(tokList); + std::string name = getNextWord(); DoxygenEntityList aNewList; aNewList.push_back(DoxygenEntity("plainstd::string", name)); doxyList.push_back(DoxygenEntity(theCommand, aNewList)); @@ -478,17 +591,17 @@ int DoxygenParser::addCommandUnique(const std::string &theCommand, else if (theCommand == "xrefitem") { if (noisy) cout << "Parsing " << theCommand << endl; - std::string key = getNextWord(tokList); + std::string key = getNextWord(); if (key.empty()) { printListError(WARN_DOXYGEN_COMMAND_ERROR, "No key followed " + theCommand + " command. Not added"); return 0; } - std::string heading = getNextWord(tokList); + std::string heading = getNextWord(); if (key.empty()) { printListError(WARN_DOXYGEN_COMMAND_ERROR, "No heading followed " + theCommand + " command. Not added"); return 0; } - std::string title = getNextWord(tokList); + std::string title = getNextWord(); if (title.empty()) { printListError(WARN_DOXYGEN_COMMAND_ERROR, "No title followed " + theCommand + " command. Not added"); return 0; @@ -503,12 +616,12 @@ int DoxygenParser::addCommandUnique(const std::string &theCommand, } // \ingroup ( [ ]) else if (theCommand == "ingroup") { - std::string name = getNextWord(tokList); + std::string name = getNextWord(); aNewList.push_back(DoxygenEntity("plainstd::string", name)); - name = getNextWord(tokList); + name = getNextWord(); if (!name.empty()) aNewList.push_back(DoxygenEntity("plainstd::string", name)); - name = getNextWord(tokList); + name = getNextWord(); if (!name.empty()) aNewList.push_back(DoxygenEntity("plainstd::string", name)); doxyList.push_back(DoxygenEntity(theCommand, aNewList)); @@ -528,9 +641,9 @@ int DoxygenParser::addCommandUnique(const std::string &theCommand, // \headerfile [] else if (theCommand == "headerfile") { DoxygenEntityList aNewList; - std::string name = getNextWord(tokList); + std::string name = getNextWord(); aNewList.push_back(DoxygenEntity("plainstd::string", name)); - name = getNextWord(tokList); + name = getNextWord(); if (!name.empty()) aNewList.push_back(DoxygenEntity("plainstd::string", name)); doxyList.push_back(DoxygenEntity(theCommand, aNewList)); @@ -551,7 +664,7 @@ int DoxygenParser::addCommandUnique(const std::string &theCommand, else if (theCommand == "weakgroup") { if (noisy) cout << "Parsing " << theCommand << endl; - std::string name = getNextWord(tokList); + std::string name = getNextWord(); if (name.empty()) { printListError(WARN_DOXYGEN_COMMAND_ERROR, "No word followed " + theCommand + " command. Not added"); return 0; @@ -568,12 +681,12 @@ int DoxygenParser::addCommandUnique(const std::string &theCommand, else if (theCommand == "ref") { if (noisy) cout << "Parsing " << theCommand << endl; - std::string name = getNextWord(tokList); + std::string name = getNextWord(); if (name.empty()) { printListError(WARN_DOXYGEN_COMMAND_ERROR, "No key followed " + theCommand + " command. Not added"); return 0; } - std::string text = getNextWord(tokList); + std::string text = getNextWord(); aNewList.push_back(DoxygenEntity("plainstd::string", name)); if (!text.empty()) aNewList.push_back(DoxygenEntity("plainstd::string", text)); @@ -583,12 +696,12 @@ int DoxygenParser::addCommandUnique(const std::string &theCommand, else if (theCommand == "subpage") { if (noisy) cout << "Parsing " << theCommand << endl; - std::string name = getNextWord(tokList); + std::string name = getNextWord(); if (name.empty()) { printListError(WARN_DOXYGEN_COMMAND_ERROR, "No name followed " + theCommand + " command. Not added"); return 0; } - std::string text = getNextWord(tokList); + std::string text = getNextWord(); aNewList.push_back(DoxygenEntity("plainstd::string", name)); if (!text.empty()) aNewList.push_back(DoxygenEntity("plainstd::string", text)); @@ -632,12 +745,12 @@ int DoxygenParser::addCommandUnique(const std::string &theCommand, else if (theCommand == "dotfile" || theCommand == "mscfile") { if (noisy) cout << "Parsing " << theCommand << endl; - std::string file = getNextWord(tokList); + std::string file = getNextWord(); if (file.empty()) { printListError(WARN_DOXYGEN_COMMAND_ERROR, "No file followed " + theCommand + " command. Not added"); return 0; } - std::string caption = getNextWord(tokList); + std::string caption = getNextWord(); aNewList.push_back(DoxygenEntity("plainstd::string", file)); if (!caption.empty()) aNewList.push_back(DoxygenEntity("plainstd::string", caption)); @@ -647,18 +760,18 @@ int DoxygenParser::addCommandUnique(const std::string &theCommand, else if (theCommand == "image") { if (noisy) cout << "Parsing " << theCommand << endl; - std::string format = getNextWord(tokList); + std::string format = getNextWord(); if (format.empty()) { printListError(WARN_DOXYGEN_COMMAND_ERROR, "No format followed " + theCommand + " command. Not added"); return 0; } - std::string file = getNextWord(tokList); + std::string file = getNextWord(); if (file.empty()) { printListError(WARN_DOXYGEN_COMMAND_ERROR, "No name followed " + theCommand + " command. Not added"); return 0; } - std::string caption = getNextWord(tokList); - std::string size = getNextWord(tokList); + std::string caption = getNextWord(); + std::string size = getNextWord(); DoxygenEntityList aNewList; aNewList.push_back(DoxygenEntity("plainstd::string", format)); @@ -673,9 +786,9 @@ int DoxygenParser::addCommandUnique(const std::string &theCommand, else if (theCommand == "addtogroup") { if (noisy) cout << "Parsing " << theCommand << endl; - std::string name = getNextWord(tokList); + std::string name = getNextWord(); if (name.empty()) { - printListError(WARN_DOXYGEN_COMMAND_ERROR, "No word followed " + theCommand + " command. Not added"); + printListError(WARN_DOXYGEN_COMMAND_ERROR, "There should be at least one word following the '" + theCommand + "' command. Command ignored."); return 0; } DoxygenEntityList aNewList; @@ -685,6 +798,7 @@ int DoxygenParser::addCommandUnique(const std::string &theCommand, } aNewList.push_front(DoxygenEntity("plainstd::string", name)); doxyList.push_back(DoxygenEntity(theCommand, aNewList)); + skipEndOfLine(); } // \if [\else ...] [\elseif ...] \endif else if (theCommand == "if" || theCommand == "ifnot" || @@ -696,7 +810,7 @@ int DoxygenParser::addCommandUnique(const std::string &theCommand, bool skipEndif = false; // if true then we skip endif after parsing block of code bool needsCond = (theCommand == "if" || theCommand == "ifnot" || theCommand == "elseif"); if (needsCond) { - cond = getNextWord(tokList); + cond = getNextWord(); if (cond.empty()) { printListError(WARN_DOXYGEN_COMMAND_ERROR, "No word followed " + theCommand + " command. Not added"); return 0; @@ -755,6 +869,7 @@ int DoxygenParser::addCommand(const std::string &commandString, doxyList.push_back(DoxygenEntity("plainstd::string", nextPhrase)); return 1; } + switch (commandBelongs(theCommand)) { case SIMPLECOMMAND: return addSimpleCommand(theCommand, doxyList); @@ -824,13 +939,13 @@ DoxygenEntityList DoxygenParser::createTree(const std::string &doxygenBlob, const std::string &fileName, int lineNumber) { - TokenList tokList = tokenizeDoxygenComment(doxygenBlob, fileName, lineNumber); + tokenizeDoxygenComment(doxygenBlob, fileName, lineNumber); if (noisy) { cout << "---TOKEN LIST---" << endl; printList(); } - DoxygenEntityList rootList = parse(tokList.end(), tokList, true); + DoxygenEntityList rootList = parse(m_tokenList.end(), m_tokenList, true); if (noisy) { cout << "PARSED LIST" << endl; @@ -842,8 +957,8 @@ DoxygenEntityList DoxygenParser::createTree(const std::string &doxygenBlob, /** * This is one of the most important methods - it breaks the original - * doxygen comment into tokens. - */ + * doxygen comment into tokens - one token per word. + * See replacement, which also handles html comments below. DoxygenParser::TokenList DoxygenParser::tokenizeDoxygenComment(const std::string &doxygenComment, const std::string &fileName, int fileLine) { @@ -860,6 +975,14 @@ DoxygenParser::TokenList DoxygenParser::tokenizeDoxygenComment(const std::string pos = doxygenComment.find_first_of("\\@\t\n ", lastPos); if (pos == string::npos) { pos = doxygenComment.size(); + // } else { + // preserve whitespaces + // while (pos != string::npos && (doxygenComment[pos] == ' ' || doxygenComment[pos] == '\t')) { + // pos++; + // } + // if (pos == string::npos) { + // pos = doxygenComment.size(); + // } } currentWord = doxygenComment.substr(lastPos, pos-lastPos); @@ -875,7 +998,7 @@ DoxygenParser::TokenList DoxygenParser::tokenizeDoxygenComment(const std::string currentWord += doxygenComment[pos]; pos++; } - // also strip the command till the first nonalpha char + // also strip the command till the first non-alpha char for (size_t i = 2; i < currentWord.size(); i++) { if (!isalpha(currentWord[i])) { currentWord = currentWord.substr(0, i); @@ -920,6 +1043,213 @@ DoxygenParser::TokenList DoxygenParser::tokenizeDoxygenComment(const std::string return tokList; } + */ + + +// Splits 'text' on 'separator' chars. Separator chars are not part of the strings. +DoxygenParser::StringVector DoxygenParser::split(const std::string &text, char separator) +{ + StringVector lines; + size_t prevPos = 0, pos = 0; + + while (pos < string::npos) { + pos = text.find(separator, prevPos); + lines.push_back(text.substr(prevPos, pos - prevPos)); + prevPos = pos + 1; + } + + return lines; +} + + +bool DoxygenParser::isStartOfDoxyCommentChar(char c) +{ + return (strchr("*/!", c) != NULL); +} + + +void DoxygenParser::addDoxyCommand(DoxygenParser::TokenList &tokList, + const std::string &cmd) { + if (findCommand(cmd)) { + tokList.push_back(Token(COMMAND, cmd)); + } else { + // Unknown commands are ignored, because they are + // also ignored by Doxygen - see test doxygen_misc_constructs.h, f. backslashB(). + // This differs from original implementation in this class. Uncomment + // the line below to put unknown commands to output. + // tokList.push_back(Token(PLAINSTRING, cmd)); + } +} + + +size_t DoxygenParser::processVerbatimText(size_t pos, const std::string &line) +{ + if (line[pos] == '\\' || line[pos] == '@') { + pos++; + // characters '$[]{}' are used in commands \f$, \f[, ... + size_t endOfWordPos = line.find_first_not_of("abcdefghijklmnopqrstuvwxyz$[]{}", pos); + string cmd = line.substr(pos , endOfWordPos - pos); + + if (cmd == CMD_END_HTML_ONLY || cmd == CMD_END_VERBATIM) { + m_isVerbatimText = false; + addDoxyCommand(m_tokenList, cmd); + } else { + cmd = line[pos] + cmd; // prepend '\\' or '@' + m_tokenList.push_back(Token(PLAINSTRING, + line.substr(pos, endOfWordPos - pos))); + } + pos = endOfWordPos; + } else { + // whitespaces are stored as plain strings + size_t startOfPossibleEndCmd = line.find_first_of("\\@", pos); + m_tokenList.push_back(Token(PLAINSTRING, + line.substr(pos, startOfPossibleEndCmd - pos))); + pos = startOfPossibleEndCmd; + } + + return pos; +} + + +size_t DoxygenParser::processNormalComment(size_t pos, const std::string &line) +{ + switch (line[pos]) { + case '\\': // process doxy command or escaped char + /* switch (line[pos + 1]) { + case '$': + case '@': + case '\': + case '&': + case '~': + case '<': + case '>': + case '#': \% \" \. \:: + } + TODO break case if any of escaped chars */ + case '@': { + pos++; + // characters '$[]{}' are used in commands \f$, \f[, ... + size_t endOfWordPos = line.find_first_not_of("abcdefghijklmnopqrstuvwxyz$[]{}", pos); + string cmd = line.substr(pos , endOfWordPos - pos); + addDoxyCommand(m_tokenList, cmd); + if (cmd == CMD_HTML_ONLY || cmd == CMD_VERBATIM) { + m_isVerbatimText = true; + } + // skip any possible spaces after command, because some commands have parameters, + // and spaces between command and parameter must be ignored. + if (endOfWordPos != string::npos) { + pos = line.find_first_not_of(" \t", endOfWordPos); + } else { + pos = string::npos; + } + } break; + + case ' ': // whitespace + case '\t': { + // whitespaces are stored as plain strings + size_t startOfNextWordPos = line.find_first_not_of(" \t", pos + 1); + m_tokenList.push_back(Token(PLAINSTRING, + line.substr(pos, startOfNextWordPos - pos))); + pos = startOfNextWordPos; + } break; + + case '<': { // process html commands + + size_t endHtmlPos = line.find_first_of("\t >", pos + 1); + if (endHtmlPos != string::npos) { + // will push plain string Token. If the command is not HTML supported by + // Doxygen, < and > will be replaced by HTML entities < and > respectively, + // but only if 'htmlOnly' flag == false. The flag is set/reset by \htmlonly \verbatim, + // \endhtmlonly \endverbatim Doxygen commands. + // handleHTMLCommand(line.substr(pos + 1), endHtmlPos - pos - 1); + } + pos = endHtmlPos; + } break; + + case '&': { // process HTML entities + size_t endOfWordPos = line.find_first_not_of("abcdefghijklmnopqrstuvwxyz", pos + 1); + if (endOfWordPos != string::npos) { + if (line[endOfWordPos] == ';') { + addDoxyCommand(m_tokenList, line.substr(pos, endOfWordPos)); + } else { + // it is not an entity - push plain string + m_tokenList.push_back(Token(PLAINSTRING, + line.substr(pos, endOfWordPos - pos))); + pos = endOfWordPos; + } + } else { + pos = string::npos; + } + } + break; + default: + printListError(WARN_DOXYGEN_COMMAND_ERROR, "Unknown special character: " + line[pos]); + } + + return pos; +} + + +/** + * This method tokenizes Doxygen comment to words and doxygen commands. + */ +void DoxygenParser::tokenizeDoxygenComment(const std::string &doxygenComment, + const std::string &fileName, + int fileLine) +{ + m_isVerbatimText = false; + m_tokenList.clear(); + m_fileLineNo = fileLine; + m_fileName = fileName; + + StringVector lines = split(doxygenComment, '\n'); + + for (StringVectorCIt it = lines.begin(); it != lines.end(); it++) { + const string &line = *it; + size_t pos = line.find_first_not_of(" \t"); + + // skip sequences of '*', '/', and '!' of any length + while (pos != string::npos && isStartOfDoxyCommentChar(line[pos])) { + pos++; + } + + if (pos == string::npos) { + m_tokenList.push_back(Token(END_LINE, "\n")); + continue; + } + + // line[pos] may be ' \t' or start of word, it there was no '*', '/' or '!' + // at beginning of the line. Make sure it points to start of the first word + // in the line. + pos = line.find_first_not_of(" \t", pos); + if (pos == string::npos) { + m_tokenList.push_back(Token(END_LINE, "\n")); + continue; + } + + while (pos != string::npos) { + // find the end of the word + size_t doxyCmdOrHtmlTagPos = line.find_first_of("\\@< \t", pos); + if (doxyCmdOrHtmlTagPos != pos) { + // plain text found + m_tokenList.push_back(Token(PLAINSTRING, + line.substr(pos, doxyCmdOrHtmlTagPos - pos))); + } + + pos = doxyCmdOrHtmlTagPos; + if (pos != string::npos) { + if (m_isVerbatimText) { + pos = processVerbatimText(pos, line); + } else { + pos = processNormalComment(pos, line); + } + } + } + m_tokenList.push_back(Token(END_LINE, "\n")); // add when pos == npos - end of line + } + + m_tokenListIt = m_tokenList.begin(); +} void DoxygenParser::printList() { diff --git a/Source/DoxygenTranslator/src/DoxygenParser.h b/Source/DoxygenTranslator/src/DoxygenParser.h index 6e2c539ce..019da8785 100644 --- a/Source/DoxygenTranslator/src/DoxygenParser.h +++ b/Source/DoxygenTranslator/src/DoxygenParser.h @@ -14,6 +14,7 @@ #include #include #include +#include #include #include "DoxygenEntity.h" @@ -67,7 +68,7 @@ private: }; - typedef std::list TokenList; + typedef std::vector TokenList; typedef TokenList::const_iterator TokenListCIt; typedef TokenList::iterator TokenListIt; @@ -84,6 +85,8 @@ private: static DoxyCommandsMap doxygenCommands; static std::set doxygenSectionIndicators; + bool m_isVerbatimText; // used to handle \htmlonly and \verbatim commands + std::string m_fileName; int m_fileLineNo; @@ -119,12 +122,34 @@ private: */ void printTree(const std::list &rootList); + /** + * Returns true if the next token is end of line token. This is important + * when single word commands like \c are at the end of line. + */ + bool isEndOfLine(); + + /** + * Skips spaces, tabs, and end of line tokens. + */ + void skipWhitespaceTokens(); + + /** + * Removes all spaces and tabs from beginning end end of string. + */ + std::string trim(const std::string &text); + /* * Returns the next word ON THE CURRENT LINE ONLY * if a new line is encountered, returns a blank std::string. - * Updates the index it is given if success. + * Updates the iterator if successful. */ - std::string getNextWord(const TokenList &tokList); + std::string getNextWord(); + + /* + * Returns the next word, which is not necessarily on the same line. + * Updates the iterator if successful. + */ + std::string getNextWordInComment(); /* * Returns the location of the end of the line as @@ -178,6 +203,16 @@ private: const TokenList &tokList); */ + /** + * This methods skips end of line token, if it is the next token to be + * processed. It is called with comment commands which have args till the + * end of line, such as 'addtogroup' or 'addindex'. + * It is up to translator to specific language to decide whether + * to insert eol or not. For example, if a command is ignored in target + * language, new lines may make formatting ugly (Python). + */ + void skipEndOfLine(); + /* * Method for Adding a Simple Command * Format: @command @@ -298,13 +333,26 @@ private: * Fill static doxygenCommands and sectionIndicators containers */ void fillTables(); - - TokenList tokenizeDoxygenComment(const std::string &doxygenComment, - const std::string &fileName, - int fileLine); + + /** Processes comment when \htmlonly and \verbatim commands are encountered. */ + size_t processVerbatimText(size_t pos, const std::string &line); + + /** Processes comment outside \htmlonly and \verbatim commands. */ + size_t processNormalComment(size_t pos, const std::string &line); + + void tokenizeDoxygenComment(const std::string &doxygenComment, + const std::string &fileName, + int fileLine); void printList(); void printListError(int warningType, const std::string &message); + typedef std::vector StringVector; + typedef StringVector::const_iterator StringVectorCIt; + + StringVector split(const std::string &text, char separator); + bool isStartOfDoxyCommentChar(char c); + void addDoxyCommand(DoxygenParser::TokenList &tokList, const std::string &cmd); + public: DoxygenParser(bool noisy = false); virtual ~DoxygenParser(); diff --git a/Source/DoxygenTranslator/src/JavaDocConverter.cpp b/Source/DoxygenTranslator/src/JavaDocConverter.cpp index 83b08ba39..8527ed689 100644 --- a/Source/DoxygenTranslator/src/JavaDocConverter.cpp +++ b/Source/DoxygenTranslator/src/JavaDocConverter.cpp @@ -30,6 +30,41 @@ void JavaDocConverter::fillStaticTables() { if (tagHandlers.size()) // fill only once return; + /* + * Some translation rules: + * + * @ and \ must be escaped for both Java and Python to appear on output: \@, \\, + * while Doxygen produces output in both cases. + * Rule: @ and \ with space on the right should get to output. + * + * :: remains intact, even in class::method(). But you can use class#method also + * in C++ comment and it is properly translated to C++ output (changed by doxygen to ::) + * and Java output (remains #). + * Rule: SWIG type system can't be used to convert C::m to C#m, because in Java it is C.m + * Use string replacement :: --> # in tag see and links. + * + * HTML tags must be translated - remain in Java, to markdown in Python + * + * Unknown HTML tags, for example is translated to <x> by doxygen, while + * Java src is and therefore invisible on output - browser ignores unknown command. + * This is handy in syntax descriptions, for example: more . + * + * Standlaone < and > need not to be translated, they are rendered properly in + * all three outputs. + * + * ., %, and " need not to be translated + * + * entities must be translated - remain in Java, something meaningfull in Python (<, ...) + * + * \e at end of line freezes doxygen + * + * - enum inside class is missing comment + * - '\' not representing doxygen commands + * - add comments also to auto-generated methods lilke equals(), delete() in Java, + * and methods for std::vector(), ... + */ + + // these commands insert HTML tags tagHandlers["a"] = make_pair(&JavaDocConverter::handleTagHtml, "i"); tagHandlers["arg"] = make_pair(&JavaDocConverter::handleTagHtml, "li"); @@ -172,8 +207,11 @@ std::string JavaDocConverter::formatCommand(std::string unformattedLine, /** - * Returns true, if the given parameter exists in the current node. If feature - * 'doxygen:nostripparams' is set, then this method always returns true. + * Returns true, if the given parameter exists in the current node + * (for example param is a name of function parameter). If feature + * 'doxygen:nostripparams' is set, then this method always returns + * true - parameters are copied to output regardless of presence in + * function params list. */ bool JavaDocConverter::paramExists(std::string param) { @@ -276,8 +314,8 @@ void JavaDocConverter::handleParagraph(DoxygenEntity& tag, std::string& translat void JavaDocConverter::handlePlainString(DoxygenEntity& tag, std::string& translatedComment, std::string&) { translatedComment += tag.data; - if (tag.data.size() && tag.data[tag.data.size()-1] != ' ') - translatedComment += " "; + // if (tag.data.size() && tag.data[tag.data.size()-1] != ' ') + // translatedComment += " "; } @@ -345,8 +383,11 @@ void JavaDocConverter::handleTagPar(DoxygenEntity& tag, std::string& translatedC } -void JavaDocConverter::handleTagParam(DoxygenEntity& tag, std::string& translatedComment, std::string&) { +void JavaDocConverter::handleTagParam(DoxygenEntity& tag, + std::string& translatedComment, + std::string&) { std::string dummy; + if (!tag.entityList.size()) return; if (!paramExists(tag.entityList.begin()->data)) @@ -356,6 +397,7 @@ void JavaDocConverter::handleTagParam(DoxygenEntity& tag, std::string& translate translatedComment += tag.entityList.begin()->data + " "; tag.entityList.pop_front(); handleParagraph(tag, translatedComment, dummy); + printf("cmd: %s\n", translatedComment.c_str()); } diff --git a/Source/DoxygenTranslator/src/JavaDocConverter.h b/Source/DoxygenTranslator/src/JavaDocConverter.h index 9715593c6..2c3ac72f0 100644 --- a/Source/DoxygenTranslator/src/JavaDocConverter.h +++ b/Source/DoxygenTranslator/src/JavaDocConverter.h @@ -35,7 +35,7 @@ protected: */ std::string translateSubtree(DoxygenEntity & doxygenEntity); /* - * Translate one entity with the appropriate handler, acording + * Translate one entity with the appropriate handler, according * to the tagHandlers */ void translateEntity(DoxygenEntity &tag, std::string &translatedComment);
      ", "", "