]> gitweb.hamatoma.de Git - reqt/commitdiff
recform: initial state
authorhama <hama@siduction.net>
Sun, 27 Dec 2015 22:53:32 +0000 (23:53 +0100)
committerhama <hama@siduction.net>
Sun, 27 Dec 2015 22:53:32 +0000 (23:53 +0100)
19 files changed:
appl/recform/CFormatter.cpp
appl/recform/CFormatter.hpp
appl/recform/CppDefinitions.hpp [new file with mode: 0644]
appl/recform/CppParser.cpp [new file with mode: 0644]
appl/recform/CppParser.hpp [new file with mode: 0644]
appl/recform/cuReCFormatter.cpp [new file with mode: 0644]
appl/recform/recform.pro
appl/recform/recformmain.cpp [new file with mode: 0644]
appl/recform/recformmain.hpp [new file with mode: 0644]
base/ReWriter.cpp
base/ReWriter.hpp
cunit/cuReLexer.cpp
cunit/cuReQStringUtils.cpp
cunit/cunit.pro
expr/ReLexer.cpp
expr/ReLexer.hpp
expr/ReParser.cpp
expr/ReParser.hpp
remodules.hpp

index be92045919ffe8900b9a6e8b9a0ab007251d36af..60e1e87e90f252c36740a574186116e383f4adee 100644 (file)
@@ -1,21 +1,85 @@
 /*
- * Licence:
+ * CFormatter.cpp
+ *
+ * (Un)License: Public Domain
  * You can use and modify this file without any restriction.
- * There is no warranty.
- * You also can use the licence from http://www.wtfpl.net/.
- * The original sources can be found on https://github.com/republib.
-*/
+ * Do what you want.
+ * No warranties and disclaimer of any damages.
+ * More info: http://unlicense.org
+ * The latest sources: https://github.com/republib
+ */
 
-#include "CFormatter.hpp"
+#include "recformmain.hpp"
+
+enum {
+       LOC_RESTORE_PARENTH_STACK_1 = LOC_FIRST_OF(LOC_RECFORM_CPPFORMATTER), // 20201
+};
 
 /**
  * Constructor.
  *
- * @param args the program arguments
+ * @param args         the program arguments
+ * @param logger       the logger
+ */
+CFormatter::CFormatter(ReProgramArgs& args, ReLogger* logger) :
+       m_args(args),
+       m_blockLevel(0),
+       m_parenthLevel(0),
+       m_parenthLevelStack(),
+       m_logicalLine(),
+       m_continued(false),
+       m_lastDeclToken(-1),
+       m_writer(NULL),
+       m_logger(logger),
+       m_tabSize(4),
+       m_useTab(true),
+       m_lexer(NULL),
+       m_parser(NULL)
+{
+}
+
+/**
+  * @file
+  *
+  * The formatter collects <b>logical lines</b>: this is a collection of symbols
+  * which should be written in one line.
+  * If the line length is to large, this logical line will be divided into
+  * more real lines with a higher indention.
+  */
+
+/**
+ * Adds a token to the current logical line.
+ * @param token
+ */
+void CFormatter::addToken(ReToken* token)
+{
+       m_logicalLine.push_back(FormatToken(token, m_parenthLevel));
+}
+
+/**
+ * Write the current locical line to the output media.
+ *
+ * @param isPart       <code>true</code>: the logical line may be continued
  */
-CFormatter::CFormatter(ReProgramArgs& args) :
-       m_args(args)
+void CFormatter::flush(bool isPart)
 {
+       if (m_logicalLine.size() > 0){
+               QByteArray buffer;
+               buffer.reserve(8096);
+               indent(m_blockLevel + m_continued ? 1 : 0, buffer);
+               FormatToken* lastItem = &m_logicalLine.data()[0];
+               buffer.append(lastItem->toString());
+               for (int ix = 1; ix < m_logicalLine.size(); ix++){
+                       FormatToken* item = &m_logicalLine.data()[ix];
+                       if (needsBlank(lastItem, item, ix <= m_lastDeclToken))
+                               buffer.append(' ');
+                       m_lexer->textOfToken(item, buffer);
+                       lastItem = item;
+               }
+               m_writer->writeLine(buffer);
+               m_logicalLine.clear();
+       }
+       m_continued = isPart;
 }
 
 /**
@@ -26,21 +90,7 @@ CFormatter::CFormatter(ReProgramArgs& args) :
 void CFormatter::format(const char* filename)
 {
        ReSimpleSource source(filename);
-       const char* keywords = "";
-       const char* operators = "";
-       const char* rightAssociatives = "";
-       const char* comments = "";
-       const char* firstCharsId = "a-zA-Z_";
-       const char* restCharsId = "a-zA-Z0-9_";
-       int numericTypes = ReLexer::NUMTYPE_DECIMAL | ReLexer::NUMTYPE_HEXADECIMAL
-                       | ReLexer::NUMTYPE_FLOAT;
-       int stringFeatures = ReLexer::SF_TICK | ReLexer::SF_QUOTE
-                       | ReLexer::SF_C_ESCAPING | ReLexer::SF_C_SPECIAL | ReLexer::SF_C_HEX_CHARS;
-       int storageFlags = ReLexer::STORE_COMMENT | ReLexer::STORE_ORG_STRING;
-       ReLexer lexer(&source, keywords, operators, rightAssociatives, comments,
-                                 firstCharsId, restCharsId, numericTypes, stringFeatures,
-                                 storageFlags);
-
+       CppLexer lexer(&source);
 }
 
 /**
@@ -52,3 +102,324 @@ void CFormatter::formatAll()
                format(m_args.arg(ix));
        }
 }
+
+/**
+ * Writes the indention characters (tabs or blanks) into a buffer.
+ *
+ * @param level                the level to indent
+ * @param buffer       OUT: the buffer to append
+ */
+void CFormatter::indent(int level, QByteArray& buffer){
+       int count = m_blockLevel;
+       if (m_continued)
+               count++;
+       int cols = count * m_tabSize;
+       if (! m_useTab)
+               count *= m_tabSize;
+       while(--count > 0)
+               buffer.append(m_useTab ? '\t' : ' ');
+}
+
+/**
+ * Handles a label.
+ *
+ * @precond    the label name is the only element in the logical line and
+ *                     the current token is the ':'
+ */
+void CFormatter::label()
+{
+
+}
+
+/**
+ * Tests whether a blank is needed between two tokens.
+ *
+ * @param first                        the first token
+ * @param second               the 2nd token
+ * @param isDeclaration        <code>true</code>: tokens inside the type area of a
+ *                                             declaration. Operators '&', '*'... are treated differently
+ * @return                             <code>true</code>: a blank is needed.
+ */
+bool CFormatter::needsBlank(ReToken* first, ReToken* second, bool isDeclaration){
+       ReTokenType type1 = first->tokenType();
+       ReTokenType type2 = second->tokenType();
+       CppOperator op;
+       bool rc = false;
+       switch(type1){
+       case TOKEN_STRING:
+       case TOKEN_NUMBER:
+       case TOKEN_REAL:
+               switch(type2){
+               case TOKEN_STRING:
+               case TOKEN_NUMBER:
+               case TOKEN_REAL:
+               case TOKEN_KEYWORD:
+               case TOKEN_ID:
+               case TOKEN_COMMENT_START:
+                       rc = true;
+                       break;
+               case TOKEN_OPERATOR:
+                       rc = needsPrecedingBlank((CppOperator) second->id());
+                       break;
+               default:
+                       rc = true;
+               }
+               break;
+       case TOKEN_KEYWORD:
+       case TOKEN_ID:
+               switch(type2){
+               case TOKEN_STRING:
+               case TOKEN_NUMBER:
+               case TOKEN_REAL:
+               case TOKEN_KEYWORD:
+               case TOKEN_ID:
+                       rc = true;
+                       break;
+               case TOKEN_OPERATOR:
+                       if (! isDeclaration)
+                               rc = needsPrecedingBlank((CppOperator) second->id());
+                       else {
+                               rc = (op = (CppOperator) second->id()) != OP_STAR
+                                        && op != OP_BIT_AND && op != OP_GT && op != OP_LT;
+                       }
+                       break;
+               default:
+                       rc = true;
+                       break;
+               }
+               break;
+       case TOKEN_OPERATOR:
+               switch(type2){
+               case TOKEN_STRING:
+               case TOKEN_NUMBER:
+               case TOKEN_REAL:
+               case TOKEN_KEYWORD:
+               case TOKEN_ID:
+                       rc = needsTrailingBlank((CppOperator) first->id());
+                       break;
+               case TOKEN_OPERATOR:
+                       if (! isDeclaration)
+                               rc = needsTrailingBlank((CppOperator) first->id())
+                                       || needsPrecedingBlank((CppOperator) second->id());
+                       else {
+                               rc = (op = (CppOperator) second->id()) != OP_GT && op != OP_LT;
+                       }
+                       break;
+               default:
+                       rc = true;
+                       break;
+               }
+               break;
+       case TOKEN_COMMENT_START:
+               rc = true;
+               break;
+       default:
+               break;
+       }
+       return rc;
+}
+
+/**
+ * Tests whether an operator needs a preceding blank.
+ *
+ * @param op   operator to test
+ * @return             <code>true</code>: a blank is needed.
+ */
+
+bool CFormatter::needsPrecedingBlank(CppOperator op)
+{
+       bool rc = true;
+       switch(op){
+       case OP_QUESTIONMARK:
+       case OP_COLON:
+       case OP_ASSIGN:
+       case OP_PLUS_ASSIGN:
+       case OP_MINUS_ASSIGN:
+       case OP_DIV_ASSIGN:
+       case OP_MOD_ASSIGN:
+       case OP_LSHIFT_ASSIGN:
+       case OP_RSHIFT_ASSIGN:
+       case OP_AND_ASSIGN:
+       case OP_XOR_ASSIGN:
+       case OP_OR_ASSIGN:
+       case OP_OR:
+       case OP_AND:
+       case OP_BIT_OR:
+       case OP_BIT_XOR:
+       case OP_BIT_AND:
+       case OP_LSHIFT:
+       case OP_RSHIFT:
+       case OP_LT:
+       case OP_LE:
+       case OP_GT:
+       case OP_GE:
+       case OP_EQ:
+       case OP_NE:
+       case OP_PLUS:
+       case OP_MINUS:
+       case OP_STAR:
+       case OP_DIV:
+       case OP_MOD:
+       case OP_NOT:
+               rc = true;
+       default:
+               break;
+       }
+       return rc;
+}
+
+/**
+ * Tests whether an operator needs a trailing blank.
+ *
+ * @param op   operator to test
+ * @return             <code>true</code>: a blank is needed.
+ */
+
+bool CFormatter::needsTrailingBlank(CppOperator op)
+{
+       bool rc = true;
+       switch(op){
+       case OP_COMMA:
+       case OP_QUESTIONMARK:
+       case OP_COLON:
+       case OP_ASSIGN:
+       case OP_PLUS_ASSIGN:
+       case OP_MINUS_ASSIGN:
+       case OP_DIV_ASSIGN:
+       case OP_MOD_ASSIGN:
+       case OP_LSHIFT_ASSIGN:
+       case OP_RSHIFT_ASSIGN:
+       case OP_AND_ASSIGN:
+       case OP_XOR_ASSIGN:
+       case OP_OR_ASSIGN:
+       case OP_OR:
+       case OP_AND:
+       case OP_BIT_OR:
+       case OP_BIT_XOR:
+       case OP_BIT_AND:
+       case OP_LSHIFT:
+       case OP_RSHIFT:
+       case OP_LT:
+       case OP_LE:
+       case OP_GT:
+       case OP_GE:
+       case OP_EQ:
+       case OP_NE:
+       case OP_PLUS:
+       case OP_MINUS:
+       case OP_STAR:
+       case OP_DIV:
+       case OP_MOD:
+       case OP_NOT:
+       default:
+               break;
+       }
+       return rc;
+}
+
+/**
+ * Returns the parser.
+ *
+ * @return     the parser
+ */
+CppParser* CFormatter::parser() const
+{
+       return m_parser;
+}
+
+/**
+ * Store the current parenthesis level and set it to 0.
+ */
+void CFormatter::restoreParenthLevel(){
+       if (m_parenthLevelStack.size() <= 0)
+               m_logger->log(LOG_ERROR, LOC_RESTORE_PARENTH_STACK_1, "stack underflow");
+       else {
+               m_parenthLevel = m_parenthLevelStack.takeFirst();
+       }
+}
+
+/**
+ * Sets the block level.
+ *
+ * @param parenthLevel 0: set it to 0<br>
+ *                                             otherwise: increment it by this level
+ */
+void CFormatter::setBlockLevel(int blockLevel)
+{
+       if (blockLevel == 0)
+               m_blockLevel = 0;
+       else
+               m_blockLevel += blockLevel;
+}
+
+/**
+ * Sets the last index of declaration tokens.
+ *
+ * @param lastDeclToken        the index to set
+ */
+void CFormatter::setLastDeclToken(int lastDeclToken)
+{
+       m_lastDeclToken = lastDeclToken;
+}
+
+/**
+ * Sets the lexer.
+ *
+ * @param lexer        the lexer to set
+ */
+void CFormatter::setLexer(CppLexer* lexer)
+{
+       m_lexer = lexer;
+}
+
+/**
+ * Sets the parenthesis level.
+ *
+ * @param parenthLevel 0: set it to 0<br>
+ *                                             otherwise: increment it by this level
+ */
+void CFormatter::setParenthLevel(int parenthLevel)
+{
+       if (parenthLevel == 0)
+               m_parenthLevel = 0;
+       else
+               m_parenthLevel += parenthLevel;
+}
+
+/**
+ * Sets the parser.
+ *
+ * @param parser       the parser to set
+ */
+void CFormatter::setParser(CppParser* parser)
+{
+       m_parser = parser;
+}
+
+/**
+ * Store the current parenthesis level and set it to 0.
+ */
+void CFormatter::saveAndResetParenthLevel(){
+       m_parenthLevelStack.push_back(m_parenthLevel);
+       m_parenthLevel = 0;
+}
+
+/**
+ * Sets the writer.
+ *
+ * @param writer       the writer to set
+ */
+void CFormatter::setWriter(ReWriter* writer)
+{
+       m_writer = writer;
+}
+
+/**
+ * Returns the current writer.
+ *
+ * @return     the current writer
+ */
+ReWriter* CFormatter::writer() const
+{
+       return m_writer;
+}
index 16ad68c933b837b91566792a0ec8ab73d65d9294..de993b6553964f9800e24f61636f64a9ec9ddc88 100644 (file)
@@ -1,10 +1,13 @@
 /*
- * Licence:
+ * CFormatter.hpp
+ *
+ * (Un)License: Public Domain
  * You can use and modify this file without any restriction.
- * There is no warranty.
- * You also can use the licence from http://www.wtfpl.net/.
- * The original sources can be found on https://github.com/republib.
-*/
+ * Do what you want.
+ * No warranties and disclaimer of any damages.
+ * More info: http://unlicense.org
+ * The latest sources: https://github.com/republib
+ */
 
 #ifndef CFORMATTER_HPP
 #define CFORMATTER_HPP
 #include "expr/reexpr.hpp"
 #endif
 
+/**
+ * The smallest (not separable) unit of an output line.
+ */
+class FormatToken : public ReToken {
+public:
+       /** Constructor.
+        */
+       FormatToken() :
+               ReToken(TOKEN_UNDEF),
+               m_level(0){
+       }
+
+       /** Constructor.
+        * @param token         the token from the lexer
+        * @param level         the parenthesis level
+        */
+       FormatToken(ReToken* token, int level) :
+               ReToken(*token),
+               m_level(level){
+       }
+private:
+       int m_level;
+};
+class CppParser;
+class CppLexer;
+
 class CFormatter
 {
 public:
-       CFormatter(ReProgramArgs& args);
+       CFormatter(ReProgramArgs& args, ReLogger* logger);
 public:
+       void addToken(ReToken* token);
+       void flush(bool isPart);
        void format(const char* filename);
        void formatAll();
+       void label();
+       CppParser* parser() const;
+       void restoreParenthLevel();
+       void saveAndResetParenthLevel();
+       void setBlockLevel(int blockLevel);
+       void setLastDeclToken(int lastDeclToken);
+       void setLexer(CppLexer* lexer);
+       void setParenthLevel(int parenthLevel);
+       void setParser(CppParser* parser);
+       void setWriter(ReWriter* writer);
+       ReWriter* writer() const;
+
+protected:
+       void indent(int level, QByteArray& buffer);
+       bool needsPrecedingBlank(CppOperator op);
+       bool needsTrailingBlank(CppOperator op);
+       bool needsBlank(ReToken* first, ReToken* second, bool isDeclaration);
 private:
        ReProgramArgs& m_args;
+       int m_blockLevel;
+       int m_parenthLevel;
+       QStack<int> m_parenthLevelStack;
+       QVector<FormatToken> m_logicalLine;
+       /// true: a part of the logical line is already written (and removed)
+       bool m_continued;
+       /// index of the token which ends the declaration:
+       int m_lastDeclToken;
+       ReWriter* m_writer;
+       ReLogger* m_logger;
+       int m_tabSize;
+       bool m_useTab;
+       CppLexer* m_lexer;
+       CppParser* m_parser;
 };
 
 #endif // CFORMATTER_HPP
diff --git a/appl/recform/CppDefinitions.hpp b/appl/recform/CppDefinitions.hpp
new file mode 100644 (file)
index 0000000..78ac546
--- /dev/null
@@ -0,0 +1,143 @@
+/*
+ * CppDefinitions.hpp
+ *
+ * (Un)License: Public Domain
+ * You can use and modify this file without any restriction.
+ * Do what you want.
+ * No warranties and disclaimer of any damages.
+ * More info: http://unlicense.org
+ * The latest sources: https://github.com/republib
+ */
+
+#ifndef CPPDEFINITIONS_HPP
+#define CPPDEFINITIONS_HPP
+
+enum CppKeyword {
+       K_UNDEF,
+       K_ALIGNAS,
+       K_ALIGNOF,
+       K_ASM,
+       K_AUTO,
+       K_BOOL,
+       K_BREAK,
+       K_CASE,
+       K_CATCH,
+       K_CHAR,
+       K_CHAR16_T,
+       K_CHAR32_T,
+       K_CLASS,
+       K_CONST,
+       K_CONSTEXPR,
+       K_CONST_CAST,
+       K_CONTINUE,
+       K_DECLTYPE,
+       K_DEFAULT,
+       K_DELETE,
+       K_DO,
+       K_DOUBLE,
+       K_DYNAMIC_CAST,
+       K_ELSE,
+       K_ENUM,
+       K_EXPLICIT,
+       K_EXPORT,
+       K_EXTERN,
+       K_FALSE,
+       K_FLOAT,
+       K_FOR,
+       K_FRIEND,
+       K_GOTO,
+       K_IF,
+       K_INLINE,
+       K_INT,
+       K_LONG,
+       K_MUTABLE,
+       K_NAMESPACE,
+       K_NEW,
+       K_NOEXCEPT,
+       K_NULLPTR,
+       K_OPERATOR,
+       K_PRIVATE,
+       K_PROTECTED ,
+       K_PUBLIC,
+       K_REGISTER,
+       K_REINTERPRET_CAST,
+       K_RETURN,
+       K_SHORT,
+       K_SIGNED,
+       K_SIZEOF,
+       K_STATIC,
+       K_STATIC_ASSERT,
+       K_STATIC_CAST,
+       K_STRUCT,
+       K_SWITCH,
+       K_TEMPLATE,
+       K_THIS,
+       K_THREAD_LOCAL,
+       K_THROW,
+       K_TRUE,
+       K_TRY,
+       K_TYPEDEF,
+       K_TYPEID,
+       K_TYPENAME,
+       K_UNION,
+       K_UNSIGNED,
+       K_USING,
+       K_VIRTUAL,
+       K_VOID,
+       K_VOLATILE,
+       K_WCHAR_T,
+       K_WHILE
+ };
+enum CppOperator{
+       OP_UNDEF,
+       OP_COMMA,
+       OP_QUESTIONMARK,
+       OP_COLON,
+       OP_ASSIGN,
+       OP_PLUS_ASSIGN,
+       OP_MINUS_ASSIGN,
+       OP_DIV_ASSIGN,
+       OP_MOD_ASSIGN,
+       OP_LSHIFT_ASSIGN,
+       OP_RSHIFT_ASSIGN,
+       OP_AND_ASSIGN,
+       OP_XOR_ASSIGN,
+       OP_OR_ASSIGN,
+       OP_OR,
+       OP_AND,
+       OP_BIT_OR,
+       OP_BIT_XOR,
+       OP_BIT_AND,
+       OP_LSHIFT,
+       OP_RSHIFT,
+       OP_LT,
+       OP_LE,
+       OP_GT,
+       OP_GE,
+       OP_EQ,
+       OP_NE,
+       OP_PLUS,
+       OP_MINUS,
+       OP_STAR,
+       OP_DIV,
+       OP_MOD,
+       OP_DOT_STAR,
+       OP_ARROW_STAR,
+       OP_PLUS_PLUS,
+       OP_MINUS_MINUS,
+       OP_NOT,
+       OP_BIT_NOT,
+       OP_LPARENTH,
+       OP_RPARENTH,
+       OP_LBRACKET,
+       OP_RBRACKET,
+       OP_DOT,
+       OP_ARROW,
+       OP_BELONGS,
+       OP_LBRACE,
+       OP_RBRACE,
+       OP_SEMICOLON,
+       OP_COUNT
+};
+
+#endif // CPPDEFINITIONS_HPP
diff --git a/appl/recform/CppParser.cpp b/appl/recform/CppParser.cpp
new file mode 100644 (file)
index 0000000..5eca9ab
--- /dev/null
@@ -0,0 +1,355 @@
+/*
+ * CppParser.cpp
+ *
+ * (Un)License: Public Domain
+ * You can use and modify this file without any restriction.
+ * Do what you want.
+ * No warranties and disclaimer of any damages.
+ * More info: http://unlicense.org
+ * The latest sources: https://github.com/republib
+ */
+
+#include "recformmain.hpp"
+
+enum {
+       LOC_FORMATASTREE_1 = LOC_FIRST_OF(LOC_RECFORM_CPPPARSER), // 20101
+       LOC_BODY_1,             // 20102
+       LOC_BODY_2,             // 20103
+       LOC_STARTS_WITH_ID_1,   // 20104
+       LOC_BODY_3,                             // 20104
+};
+
+const char* CppParser::m_keywords = "alignas alignof asm auto bool break case catch "
+               "char char16_t char32_t class const constexpr const_cast continue "
+               "decltype default delete do double dynamic_cast else enum explicit "
+               "export extern false float for friend goto if inline int long mutable "
+               "namespace new noexcept nullptr operator private protected public "
+               "register reinterpret_cast return short signed sizeof static "
+               "static_assert static_cast struct switch template this thread_local "
+               "throw true try typedef typeid typename union unsigned using virtual "
+               "void volatile wchar_t while";
+const char* CppParser::m_operators = ",\n? : = += -= /= %= <<= >>= &= ^= |=\n"
+               "||\n&&\n|\n^\n&\n<< >>\n< <= > >=\n== !=\n+ -\n* / %\n.* ->*\n"
+               "++ -- ! ~\n( ) [ ] . ->\n::\n{ } ;";
+const char* CppParser::m_rightAssociatives = "? : = += -= /= %= <<= >>= &= ^= |=";
+const char* CppParser::m_comments = "/* */ // \n";
+const char* CppParser::m_firstCharsId = "a-zA-Z_";
+const char* CppParser::m_restCharsId = "a-zA-Z0-9_";
+
+/**
+ * Constructor.
+ *
+ * @param lexer        the tokenizer delivering the syntax elements
+ * @param formatter    the formatter info
+ */
+CppParser::CppParser(CppLexer& lexer, CFormatter& formatter) :
+       ReParser(lexer),
+       m_formatter(formatter),
+       m_newlinePreceds(false)
+{
+       m_formatter.setLexer(&lexer);
+       m_formatter.setParser(this);
+}
+
+/**
+ * Handles a sequence of blanks.
+ *
+ * Find out whether a newline is in the blank sequence.
+ *
+ * @param token        the token with the first blank
+ */
+void CppParser::blank(ReToken* token)
+{
+       m_newlinePreceds = token->toString().indexOf('\n') >= 0;
+       while( (token = m_lexer.nextToken()) != NULL && token->tokenType() == TOKEN_SPACE)
+               if (token->toString().indexOf('\n') >= 0)
+                       m_newlinePreceds = true;
+       if (token->tokenType() != TOKEN_END_OF_SOURCE)
+               m_lexer.undoLastToken();
+}
+
+/**
+ * Parses the current file.
+ */
+void CppParser::parse()
+{
+       body(BT_PROG);
+}
+
+/**
+ * Handle a declaration or an expression or a label.
+ *
+ * @param token        the first token of the logical line
+ */
+void CppParser::nonStatement(ReToken* token)
+{
+       m_formatter.addToken(token);
+       int count = 0;
+       bool again = true;
+       ReTokenType tokenType;
+       bool isDecl = true;
+       m_formatter.saveAndResetParenthLevel();
+       while(again) {
+               count++;
+               token = m_lexer.nextToken();
+               tokenType = token->tokenType();
+               if (count == 2 && token->isOperator(OP_COLON))
+                       m_formatter.label();
+               else {
+                       m_formatter.addToken(token);
+
+                       if (tokenType == TOKEN_OPERATOR) {
+                               if (isAssignment((CppOperator) token->id()))
+                               isDecl = false;
+                               else if (token->isOperator(OP_LPARENTH, OP_LBRACKET))
+                                       m_formatter.setParenthLevel(+1);
+                               else if (token->isOperator(OP_RPARENTH, OP_RBRACKET))
+                                       m_formatter.setParenthLevel(-1);
+                       }
+
+               }
+
+       }
+       m_formatter.restoreParenthLevel();
+}
+
+/**
+ * Handles a statement.
+ *
+ * @param token        the first token of the statement
+ */
+void CppParser::statement(ReToken* token)
+{
+}
+/**
+ * Handles a class/struct/union header.
+ *
+ * @param type TOKEN_CLASS, TOKEN_STRUCT or TOKEN_UNION
+ */
+void CppParser::classHeader(ReTokenType type)
+{
+
+}
+
+/**
+ * Handles a declaration.
+ *
+ * @param keyword      the intro of the declaration
+ */
+void CppParser::declaration(CppKeyword keyword)
+{
+
+}
+
+/**
+ * Handles a expression statement.
+ *
+ * This is "one logical line" statement, e.g. an assignment.
+ *
+ * @param token        the first token
+ */
+void CppParser::exprStatement(ReToken* token)
+{
+       do {
+               m_formatter.addToken(token);
+       } while (token->isOperator(OP_SEMICOLON));
+       m_formatter.flush(false);
+}
+
+
+/**
+ * Returns whether a keyword is a the start of a declaration.
+ *
+ * @param keyword      the keyword to inspect
+ * @return                     <code>true</code>: the token is a start of a declaration
+ */
+bool CppParser::isDeclarationIntro(CppKeyword keyword){
+       bool rc = false;
+       switch(keyword){
+       case K_STATIC:
+       case K_AUTO:
+       case K_BOOL:
+       case K_INT:
+       case K_CHAR:
+       case K_CHAR16_T:
+       case K_CHAR32_T:
+       case K_CONST:
+       case K_FLOAT:
+       case K_DOUBLE:
+       case K_LONG:
+       case K_REGISTER:
+       case K_SIGNED:
+       case K_EXPORT:
+       case K_EXTERN:
+       case K_UNSIGNED:
+       case K_VOLATILE:
+       case K_VIRTUAL:
+       case K_VOID:
+               rc = true;
+               break;
+       default:
+               break;
+       }
+       return rc;
+}
+/**
+ * Tests whether a operator is an assignment operator.
+ *
+ * @param op   test to inspect
+ * @return             <code>true</code>: <code>op</coee> is an assignment operator
+ */
+bool CppParser::isAssignment(CppOperator op){
+       bool rc = op >= OP_ASSIGN && op <= OP_OR_ASSIGN;
+       return rc;
+}
+
+/**
+ * Returns whether a keyword is a the start of a declaration.
+ *
+ * @param keyword      keyword to inspect
+ * @return                     <code>true</code>: the token is a start of a declaration
+ */
+bool CppParser::isStatement(CppKeyword keyword){
+       bool rc = false;
+       switch(keyword){
+       case K_DO:
+       case K_FOR:
+       case K_GOTO:
+       case K_IF:
+       case K_SWITCH:
+       case K_TRY:
+       case K_THROW:
+       case K_WHILE:
+       case K_RETURN:
+       case K_USING:
+               rc = true;
+               break;
+       default:
+               break;
+       }
+       return rc;
+}
+
+/**
+ * Parses the current file.
+ */
+void CppParser::body(BodyType bodyType){
+       ReToken* token;
+       ReTokenType type;
+       CppKeyword keyword;
+       while ( (token = m_lexer.nextToken()) != NULL
+                       && (type = token->tokenType()) != TOKEN_END_OF_SOURCE){
+               switch(token->tokenType()){
+               case TOKEN_STRING:
+               case TOKEN_NUMBER:
+               case TOKEN_REAL:
+                       if (bodyType == BT_CLASS || bodyType == BT_PROG)
+                               syntaxError(LOC_BODY_1, "unexpected token");
+                       exprStatement(token);
+                       break;
+               case TOKEN_OPERATOR:
+                       if (token->isOperator(OP_RBRACE))
+                               break;
+                       else
+                               syntaxError(LOC_BODY_3, "unexpected token");
+                       break;
+               case TOKEN_ID:
+                       nonStatement(token);
+                       break;
+               case TOKEN_KEYWORD:
+                       keyword = (CppKeyword) token->id();
+                       if (isStatement(keyword))
+                               statement(token);
+                       else
+                               nonStatement(token);
+                       break;
+               case TOKEN_COMMENT_REST_OF_LINE:
+               case TOKEN_COMMENT_START:
+               case TOKEN_COMMENT_END:
+                       comment(token);
+                       break;
+               case TOKEN_SPACE:
+                       blank(token);
+                       break;
+               case TOKEN_END_OF_SOURCE:
+                       break;
+               case TOKEN_UNDEF:
+               default:
+                       syntaxError(LOC_BODY_2, "unexpected token");
+                       break;
+               }
+       }
+}
+
+/**
+ * Handles a comment.
+ *
+ * Reads the comment and write it formatted.
+ *
+ * @param token        the comment start
+ */
+void CppParser::comment(ReToken* token)
+{
+
+}
+
+/**
+ * Constructor.
+ *
+ * @param source       the input source
+ */
+CppLexer::CppLexer(ReSource* source) :
+       ReLexer(source, CppParser::m_keywords, CppParser::m_operators,
+                       CppParser::m_rightAssociatives, CppParser::m_comments,
+                       CppParser::m_firstCharsId, CppParser::m_restCharsId,
+                       ReLexer::NUMTYPE_DECIMAL | ReLexer::NUMTYPE_HEXADECIMAL
+                               | ReLexer::NUMTYPE_FLOAT,
+                       ReLexer::SF_TICK | ReLexer::SF_QUOTE | ReLexer::SF_C_ESCAPING
+                       | ReLexer::SF_C_SPECIAL | ReLexer::SF_C_HEX_CHARS,
+                       ReLexer::STORE_ALL),
+       m_preview(false),
+       m_queue(),
+       m_indexQueue(0)
+{
+       setStoreAll(true);
+}
+
+/**
+ * Returns the next token.
+ *
+ * @return the next token
+ */
+ReToken*CppLexer::nextToken()
+{
+       ReToken* token;
+       if (m_preview || m_queue.size() == 0){
+               token = ReLexer::nextToken();
+               if (m_preview)
+                       m_queue.append(*token);
+       } else {
+               if (++m_indexQueue < m_queue.size()){
+                       token = &m_queue.data()[m_indexQueue];
+               } else {
+                       m_queue.clear();
+                       token = ReLexer::nextToken();
+               }
+       }
+       return token;
+}
+
+/**
+ * Sets the preview mode.
+ *
+ * In the "preview mode" the tokens will be stored in a list.
+ * After reset the same tokens will be returned again with <code>nextToken()</code>.
+ *
+ * @param preview      <code>true</code>: the tokens will be stored additionally
+ *                                     <code>false</code>: the next token will be the first of the list
+ */
+void CppLexer::setPreview(bool preview)
+{
+       m_preview = preview;
+       if (preview)
+               m_queue.clear();
+       m_indexQueue = -1;
+}
diff --git a/appl/recform/CppParser.hpp b/appl/recform/CppParser.hpp
new file mode 100644 (file)
index 0000000..a215937
--- /dev/null
@@ -0,0 +1,78 @@
+/*
+ * CppParser.hpp
+ *
+ * (Un)License: Public Domain
+ * You can use and modify this file without any restriction.
+ * Do what you want.
+ * No warranties and disclaimer of any damages.
+ * More info: http://unlicense.org
+ * The latest sources: https://github.com/republib
+ */
+
+#ifndef CPPPARSER_HPP
+#define CPPPARSER_HPP
+
+enum CppSymbol {
+       CPP_UNDEF,
+       CPP_ClassIntro,
+       CPP_ClassBody,
+       CPP_FuncIntro,
+       CPP_FuncBody,
+};
+
+/**
+ * A lexical token analyser for C++ syntax.
+ */
+class CppLexer : public ReLexer{
+public:
+       CppLexer(ReSource* source);
+public:
+       virtual ReToken* nextToken();
+       void setPreview(bool preview);
+private:
+       bool m_preview;
+       QVector<ReToken> m_queue;
+       int m_indexQueue;
+};
+
+class CFormatter;
+/**
+ * A parser for C++.
+ */
+class CppParser : public ReParser
+{
+public:
+       enum BodyType {
+               BT_PROG, BT_CLASS, BT_METHOD, BT_BLOCK
+       };
+
+public:
+       CppParser(CppLexer& lexer, CFormatter& formatter);
+public:
+       void blank(ReToken* token);
+       void body(BodyType type);
+       void classHeader(ReTokenType type);
+       void comment(ReToken* token);
+       void declaration(CppKeyword keyword);
+       void exprStatement(ReToken* token);
+       void parse();
+       void nonStatement(ReToken* token);
+       void statement(ReToken* token);
+protected:
+       void addToken(ReToken* token);
+       bool isDeclarationIntro(CppKeyword keyword);
+       bool isAssignment(CppOperator op);
+       bool isStatement(CppKeyword keyword);
+private:
+       CFormatter& m_formatter;
+       bool m_newlinePreceds;
+public:
+       static const char* m_keywords;
+       static const char* m_operators;
+       static const char* m_rightAssociatives;
+       static const char* m_comments;
+       static const char* m_firstCharsId;
+       static const char* m_restCharsId;
+};
+
+#endif // CPPPARSER_HPP
diff --git a/appl/recform/cuReCFormatter.cpp b/appl/recform/cuReCFormatter.cpp
new file mode 100644 (file)
index 0000000..836a9bc
--- /dev/null
@@ -0,0 +1,95 @@
+/*
+ * cuReCFormatter.cpp
+ *
+ * (Un)License: Public Domain
+ * You can use and modify this file without any restriction.
+ * Do what you want.
+ * No warranties and disclaimer of any damages.
+ * More info: http://unlicense.org
+ * The latest sources: https://github.com/republib
+ */
+#include "recformmain.hpp"
+/**
+ * @brief Unit test of CppFormatter.
+ */
+class TestCppFormatter: public ReTest {
+public:
+       TestCppFormatter() :
+               ReTest("CppFormatter"),
+               m_args(""),
+               m_source(),
+               m_reader(m_source),
+               m_writer(),
+               m_formatter(m_args, &m_logger),
+               m_lexer(&m_source),
+               m_parser(m_lexer, m_formatter)
+       {
+               m_reader.addSource("std", "");
+               m_source.addReader(&m_reader);
+               m_formatter.setWriter(&m_writer);
+               doIt();
+       }
+
+public:
+       void setTokens(const char* input){
+               m_reader.replaceSource("std", input);
+               ReToken* token;
+               do {
+                       token = m_lexer.nextNonSpaceToken();
+                       m_formatter.addToken(token);
+               } while (! token->isTokenType(TOKEN_END_OF_SOURCE));
+       }
+
+       void testBasic() {
+               const char* line = "const char* ptr = (const char*) abc(1 + 2) * 3 / 4;";
+               setTokens(line);
+               m_writer.buffer().clear();
+               m_formatter.setLastDeclToken(4);
+               m_formatter.flush(true);
+               checkEqu(line, m_writer.buffer());
+       }
+       void testLexer(){
+               m_reader.replaceSource("std", "const int x = 3;");
+               CppLexer lexer(&m_source);
+               lexer.setPreview(true);
+               ReToken* token;
+               do {
+                       token = lexer.nextToken();
+               } while (token->tokenType() != TOKEN_END_OF_SOURCE);
+               lexer.setPreview(false);
+               token = lexer.nextToken();
+               checkEqu(TOKEN_KEYWORD, token->tokenType());
+               checkEqu(K_CONST, token->id());
+
+               token = lexer.nextNonSpaceToken();
+               checkEqu(TOKEN_KEYWORD, token->tokenType());
+               checkEqu(K_INT, token->id());
+               token = lexer.nextNonSpaceToken();
+               checkEqu(TOKEN_ID, token->tokenType());
+               checkEqu("x", token->toString());
+               token = lexer.nextNonSpaceToken();
+               checkT(token->isOperator(OP_ASSIGN));
+               token = lexer.nextNonSpaceToken();
+               int value;
+               checkT(token->isInteger(&value));
+               checkEqu(3, value);
+       }
+
+       virtual void runTests() {
+               testBasic();
+               testLexer();
+
+       }
+       ReProgramArgs m_args;
+       ReSource m_source;
+       ReStringReader m_reader;
+       ReStringWriter m_writer;
+       CFormatter m_formatter;
+       CppLexer m_lexer;
+       CppParser m_parser;
+};
+
+void testCppFormatter() {
+       TestCppFormatter test;
+}
+
index 321298e8cefc12462074e0c035e8bde34a0c603f..726a2330ac2c076692e7de5e5e5900a2bf011f81 100644 (file)
@@ -16,21 +16,27 @@ INCLUDEPATH = ../..
 
 SOURCES += recformmain.cpp \
         ../../base/ReException.cpp \
+        ../../base/ReByteStorage.cpp \
         ../../base/ReStringUtils.cpp \
         ../../base/ReQStringUtils.cpp \
         ../../base/ReFileUtils.cpp \
         ../../base/ReProgramArgs.cpp \
         ../../base/ReCharPtrMap.cpp \
+        ../../base/ReWriter.cpp \
         ../../base/ReLogger.cpp \
+        ../../base/ReTest.cpp \
         ../../expr/ReSource.cpp \
         ../../expr/ReLexer.cpp \
         ../../expr/ReParser.cpp \
        CFormatter.cpp \
-       CppParser.cpp
+       CppParser.cpp \
+       cuReCFormatter.cpp
 
 HEADERS  += recformmain.hpp \
         ../../base/rebase.hpp \
+        ../../expr/reexpr.hpp \
        CFormatter.hpp \
-       cppparser.hpp
+       CppParser.hpp \
+       CppDefinitions.hpp
 
 CODECFORSRC = UTF-8
diff --git a/appl/recform/recformmain.cpp b/appl/recform/recformmain.cpp
new file mode 100644 (file)
index 0000000..6b58655
--- /dev/null
@@ -0,0 +1,41 @@
+/*
+ * recformmain.cpp
+ *
+ * (Un)License: Public Domain
+ * You can use and modify this file without any restriction.
+ * Do what you want.
+ * No warranties and disclaimer of any damages.
+ * More info: http://unlicense.org
+ * The latest sources: https://github.com/republib
+ */
+
+#include <QCoreApplication>
+#include "recformmain.hpp"
+
+void test(){
+
+}
+
+int main(int argc, char *argv[])
+{
+       QCoreApplication a(argc, argv);
+
+       ReProgramArgs args("$0 <opts> <source1> [ <source2> ...]\n"
+                                          "Formats c++ sources", "$0 -v");
+       args.addBool("verbose", "logs additional information", 'v', "verbose", false);
+       args.init(argc, argv);
+       if (true || args.argCount() == 1 && strcmp(args.arg(0), "test") == 0){
+               extern void testCppFormatter();
+               testCppFormatter();
+               return 0;
+       } else {
+               ReLogger logger;
+               logger.buildStandardAppender("recform");
+               CFormatter formatter(args, &logger);
+               ReSource source;
+               CppLexer lexer(&source);
+               CppParser parser(lexer, formatter);
+               formatter.formatAll();
+               return a.exec();
+       }
+}
diff --git a/appl/recform/recformmain.hpp b/appl/recform/recformmain.hpp
new file mode 100644 (file)
index 0000000..bdcbe98
--- /dev/null
@@ -0,0 +1,18 @@
+/*
+ * recformmain.hpp
+ *
+ * (Un)License: Public Domain
+ * You can use and modify this file without any restriction.
+ * Do what you want.
+ * No warranties and disclaimer of any damages.
+ * More info: http://unlicense.org
+ * The latest sources: https://github.com/republib
+ */
+#ifndef RECFORMMAIN_HPP
+#define RECFORMMAIN_HPP
+#include "base/rebase.hpp"
+#include "expr/reexpr.hpp"
+#include "CppDefinitions.hpp"
+#include "CFormatter.hpp"
+#include "CppParser.hpp"
+#endif
index f163bb75b55dfc418d59b3ad926079b482132806..1fdeac8d0e2ba8399ba1297950efd22d4d0d5dfa 100644 (file)
@@ -22,7 +22,7 @@
 #include "base/rebase.hpp"
 
 const char* ReWriter::m_tabs =
-    "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t";
+       "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t";
 int ReWriter::m_maxIndention = strlen(ReWriter::m_tabs);
 
 /** @class ReWriter ReWriter.hpp "base/ReWriter.hpp"
@@ -145,11 +145,11 @@ void ReWriter::formatIndented(int indent, const char* format, ...) {
  * @param eoln              line end: "\n" or "\r\n"
  */
 ReFileWriter::ReFileWriter(const char* filename, const char* mode,
-    FILE* additionalStream, const char* eoln) :
-           m_fp(fopen(filename, mode)),
-           m_name(filename),
-           m_eoln(eoln),
-           m_additionalStream(additionalStream) {
+       FILE* additionalStream, const char* eoln) :
+               m_fp(fopen(filename, mode)),
+               m_name(filename),
+               m_eoln(eoln),
+               m_additionalStream(additionalStream) {
 }
 
 /**
@@ -190,3 +190,56 @@ void ReFileWriter::close() {
        }
        m_additionalStream = NULL;
 }
+
+/**
+ * Constructor.
+ */
+ReStringWriter::ReStringWriter() :
+       m_buffer()
+{
+}
+
+/**
+ * Destructor.
+ */
+ReStringWriter::~ReStringWriter()
+{
+
+}
+
+/**
+ * Returns a reference to the internal buffer.
+ *
+ * @return the internal buffer
+ */
+QByteArray& ReStringWriter::buffer()
+{
+       return m_buffer;
+}
+/**
+ * Closes the writer.
+ */
+void ReStringWriter::close()
+{
+       // nothing to do
+}
+
+/**
+ * Writes a string into the buffer.
+ *
+ * @param content      string to write
+ */
+void ReStringWriter::write(const char* content)
+{
+       m_buffer.append(content);
+}
+
+/**
+ * Writes a line and a newline char into the buffer.
+ *
+ * @param line the line to write (without trailing newline)
+ */
+void ReStringWriter::writeLine(const char* line)
+{
+       m_buffer.append(line).append('\n');
+}
index 49d0f712ca3b8120646cc751be572cc363624428..337c35d06df12539c259c4f15444400f2ffb1c67 100644 (file)
@@ -41,10 +41,13 @@ protected:
        static int m_maxIndention;
 };
 
+/**
+ * Writes into a file.
+ */
 class ReFileWriter: public ReWriter {
 public:
        ReFileWriter(const char* filename, const char* mode = "w",
-           FILE* additionalStream = NULL, const char* eoln = "\n");
+               FILE* additionalStream = NULL, const char* eoln = "\n");
 public:
        virtual void write(const char* line);
        virtual void writeLine(const char* line = NULL);
@@ -56,4 +59,20 @@ protected:
        FILE* m_additionalStream;
 };
 
+/**
+ * Allows writing into a string buffer.
+ */
+class ReStringWriter : public ReWriter {
+public:
+       ReStringWriter();
+       virtual ~ReStringWriter();
+public:
+       QByteArray& buffer();
+       virtual void close();
+       virtual void write(const char* content);
+       virtual void writeLine(const char* line = NULL);
+protected:
+       QByteArray m_buffer;
+};
+
 #endif // REWRITER_HPP
index 3f330c6e1c66a3addd95cb1520fb7452b1b673d6..19c73d49a2cebc60d8ac6beae551f16478dc86b8 100644 (file)
@@ -52,7 +52,7 @@ public:
                checkEqu(0.25, asReal());
        }
 
-       ReToken* checkToken(ReToken* token, RplTokenType type, int id = 0,
+       ReToken* checkToken(ReToken* token, ReTokenType type, int id = 0,
                const char* string = NULL) {
                checkEqu(type, token->tokenType());
                if (id != 0)
index 34e1bd0d507632d00cd52342c7128507c84a98bd..f32ae4beaa68c7634c8262310fae96a4bbb9b5d9 100644 (file)
@@ -33,7 +33,7 @@ public:
        }
 
        void testLengthOfUInt64() {
-               quint64 value = -3;
+               uint64_t value = -3;
                checkEqu(1,
                        ReQStringUtils::lengthOfUInt64(ReString("0"), 0, 10, &value));
                checkEqu(int64_t(0), value);
index 181449cdc55ed289bc3f0de519f84ec4e4a64f9d..7cb6f61c2510e7beffa2a3d92126b32d46bb0a0e 100644 (file)
@@ -7,6 +7,8 @@
 QT       += core network gui
 greaterThan(QT_MAJOR_VERSION, 4): QT += widgets
 
+CONFIG += c++11
+
 TARGET = cunit
 CONFIG   += console
 CONFIG   -= app_bundle
index 9ffa4aafdf1d7187d91f82d0618cfda98bf4c560..fa12991fbdc5e51ba963fb81493ab81c8fd079df 100644 (file)
@@ -50,15 +50,28 @@ ReLexException::ReLexException(const ReSourcePosition& position,
  * @brief Implements a token which is the smallest unit for a parser.
  *
  */
+
 /**
  * @brief Constructor.
  * @param type  token type
  */
-ReToken::ReToken(RplTokenType type) :
-               m_tokenType(type),
-               m_string(),
-               m_printableString()
-// m_value
+ReToken::ReToken() :
+       m_tokenType(TOKEN_UNDEF),
+       m_string(),
+       m_printableString(),
+       m_value()
+{
+       memset(&m_value, 0, sizeof m_value);
+}
+/**
+ * @brief Constructor.
+ * @param type  token type
+ */
+ReToken::ReToken(ReTokenType type) :
+       m_tokenType(type),
+       m_string(),
+       m_printableString(),
+       m_value()
 {
        memset(&m_value, 0, sizeof m_value);
 }
@@ -88,6 +101,7 @@ ReToken::ReToken(const ReToken& source) :
 ReToken& ReToken::operator =(const ReToken& source) {
        m_tokenType = source.m_tokenType;
        m_string = source.m_string;
+       m_printableString = source.m_printableString;
        m_value = source.m_value;
        return *this;
 }
@@ -100,6 +114,14 @@ const QByteArray& ReToken::toString() {
        return m_string;
 }
 
+/**
+ * @brief Returns the string representation of the instance
+ * @return a string representing the instance
+ */
+const QByteArray& ReToken::string() const{
+       return m_string;
+}
+
 /**
  * @brief Returns the integer value of the token
  *
@@ -159,7 +181,7 @@ int ReToken::id() const {
  * @brief Returns the token type.
  * @return the token type
  */
-RplTokenType ReToken::tokenType() const {
+ReTokenType ReToken::tokenType() const {
        return m_tokenType;
 }
 
@@ -171,7 +193,7 @@ RplTokenType ReToken::tokenType() const {
  * @return  true: the expected type is the current<br>
  *          false: otherwise
  */
-bool ReToken::isTokenType(RplTokenType expected) const {
+bool ReToken::isTokenType(ReTokenType expected) const {
        return m_tokenType == expected;
 }
 
@@ -226,6 +248,20 @@ bool ReToken::isCapitalizedId() const {
        return rc;
 }
 
+/**
+ * Tests whether the token describes an integer.
+ *
+ * @param value        OUT: NULL or the value of the integer. Only defined if result is true
+ * @return             <code>true</code>: the token describes an integer
+ */
+bool ReToken::isInteger(int* value)
+{
+       bool rc = m_tokenType == TOKEN_NUMBER;
+       if (rc && value != NULL)
+               *value = m_value.m_integer;
+       return rc;
+}
+
 /**
  * @brief Returns the description of the current token.
  *
@@ -277,7 +313,7 @@ QByteArray ReToken::asUtf8() const {
  * @param   type  the type to convert
  * @return  the token type name
  */
-const char* ReToken::nameOfType(RplTokenType type) {
+const char* ReToken::nameOfType(ReTokenType type) {
        const char* rc = "?";
 
        switch (type) {
@@ -332,6 +368,20 @@ const char* ReToken::nameOfType(RplTokenType type) {
  *
  */
 
+/**
+ * Stores items in a vector.
+ *
+ * Builds also a character table with flags for the 1st, 2nd, 3rd or rest
+ * characters of the items.
+ *
+ * @param items                                a string with the items delimited by ' '
+ * @param vector                       OUT: the vector with the items
+ * @param firstCharFlag                a flag set for the first character of any item
+ * @param secondCharFlag       a flag set for the second character of any item
+ * @param thirdCharFlag                a flag set for the 3rd character of any item
+ * @param restCharFlag         a flag set for the rest characters of any items
+ * @param charInfo                     OUT: info about all characters: bitmap of the 4 flags
+ */
 static void itemsToVector(const char* items, ReLexer::StringList& vector,
        int firstCharFlag, int secondCharFlag, int thirdCharFlag, int restCharFlag,
        int charInfo[]) {
@@ -423,6 +473,7 @@ ReLexer::ReLexer(ReSource* source, const char* keywords, const char* operators,
        int stringFeatures, int storageFlags) :
                m_source(source),
                m_keywords(),
+               m_keywordNames(),
                m_operators(),
                m_commentStarts(),
                m_commentEnds(),
@@ -443,6 +494,7 @@ ReLexer::ReLexer(ReSource* source, const char* keywords, const char* operators,
                m_input(),
                m_currentCol(0),
                m_hasMoreInput(false),
+               m_storeAll(false),
                m_stringFeatures(stringFeatures),
                m_storageFlags(storageFlags),
                // m_prioOfOp
@@ -455,6 +507,7 @@ ReLexer::ReLexer(ReSource* source, const char* keywords, const char* operators,
        memset(m_assocOfOp, 0, sizeof m_assocOfOp);
 
        memset(m_charInfo, 0, sizeof m_charInfo);
+       m_keywordNames = QByteArray(keywords).split(' ');
        itemsToVector(keywords, m_keywords, CC_FIRST_KEYWORD, CC_2nd_KEYWORD,
                CC_3rd_KEYWORD, CC_REST_KEYWORD, m_charInfo);
        prepareOperators(operators, rightAssociatives);
@@ -541,7 +594,13 @@ void ReLexer::prepareOperators(const char* operators,
                start = end + 1;
        }
 }
-
+/**
+ * Initializes the comments.
+ *
+ * @param comments     a string with comment pairs delimited by ' '.<br>
+ *                                     Note: Use '\n' for the 2nd of a single line comment.<br>
+ *                                     Example: "/+ +/ // \n"
+ */
 void ReLexer::initializeComments(const char* comments) {
        if (comments != NULL) {
                QByteArray starters;
@@ -579,8 +638,8 @@ void ReLexer::initializeComments(const char* comments) {
  * @param tokenLength   the length of the prefix in <code>m_input</code>
  * @param vector        the vector to search. Each element contains the id
  *                      as last entry
- * @param id            the id of the entry in the vector. Only set if found
- * @return
+ * @return                             0: not found<br>
+ *                                             otherwise: the id of the token
  */
 int ReLexer::findInVector(int tokenLength, const StringList& vector) {
        int id = 0;
@@ -654,7 +713,7 @@ bool ReLexer::fillInput() {
  * @return          NULL: not found<br>
  *                  otherwise: the token
  */
-ReToken* ReLexer::findTokenWithId(RplTokenType tokenType, int flag2,
+ReToken* ReLexer::findTokenWithId(ReTokenType tokenType, int flag2,
        StringList& names) {
        int length = 1;
        int inputLength = m_input.size();
@@ -763,6 +822,8 @@ ReToken* ReLexer::scanNumber() {
                        length = realLength;
                }
        }
+       if (m_storeAll)
+               m_currentToken->m_string.append(m_input.constData(), length);
        m_input.remove(0, length);
        m_currentCol += length;
        return m_currentToken;
@@ -889,6 +950,11 @@ void ReLexer::scanComment() {
        m_input.remove(0, length);
        m_currentCol += length;
 }
+
+void ReLexer::setStoreAll(bool storeAll)
+{
+       m_storeAll = storeAll;
+}
 #if defined (RPL_LEXER_TRACE)
 bool ReLexer::trace() const
 {
@@ -986,6 +1052,8 @@ ReToken* ReLexer::nextToken() {
                                                                rc = m_currentToken;
                                                                rc->m_tokenType = TOKEN_OPERATOR;
                                                                rc->m_value.m_id = findInVector(1, m_operators);
+                                                               if (m_storeAll)
+                                                                       rc->m_string.append(m_input.constData(), 1);
                                                                m_input.remove(0, 1);
                                                                m_currentCol += 1;
                                                        }
@@ -1006,7 +1074,6 @@ ReToken* ReLexer::nextToken() {
                                                        m_input.remove(0, length);
                                                        m_currentCol += length;
                                                }
-
                                        }
                                }
                        }
@@ -1100,6 +1167,19 @@ size_t ReLexer::maxTokenLength() const {
        return m_maxTokenLength;
 }
 
+/**
+ * Returns the keyword to a given keyword id.
+ *
+ * @param keyword      the keyword id
+ * @return                     "": not found<br>
+ *                                     otherwise: the keyword
+ */
+const QByteArray&ReLexer::nameOfKeyword(int keyword) const
+{
+       return keyword > 0 && keyword <= m_keywordNames.size()
+                       ? m_keywordNames[keyword - 1] : ReStringUtils::m_empty;
+}
+
 /**
  * @brief Sets the maximal length of a token
  *
@@ -1115,10 +1195,10 @@ void ReLexer::setMaxTokenLength(size_t maxTokenLength) {
  */
 ReToken* ReLexer::nextNonSpaceToken() {
        ReToken* rc = NULL;
-       RplTokenType type;
+       ReTokenType type;
        do {
                rc = nextToken();
-       } while ((type = m_currentToken->tokenType()) == TOKEN_SPACE
+       } while ((type = rc->tokenType()) == TOKEN_SPACE
                || type == TOKEN_COMMENT_START || type == TOKEN_COMMENT_END
                || type == TOKEN_COMMENT_REST_OF_LINE);
        return rc;
@@ -1137,6 +1217,26 @@ ReToken* ReLexer::nextNonSpaceToken() {
 void ReLexer::startUnit(ReSourceUnitName unit) {
        m_source->startUnit(unit, *m_currentPosition);
 }
+
+/**
+ * Returns the text of the token.
+ *
+ * Note: the token does not store the text of operators and keywords.
+ *
+ * @param token                token to append
+ * @param buffer       OUT: the buffer to append
+ * @return                     <code>buffer</code> (for chaining)
+ */
+QByteArray& ReLexer::textOfToken(ReToken* token, QByteArray& buffer)
+{
+       if (token->tokenType() == TOKEN_OPERATOR)
+               buffer.append(nameOfOp(token->id()));
+       else if (token->tokenType() == TOKEN_KEYWORD)
+               buffer.append(nameOfKeyword(token->id()));
+       else
+               buffer.append(token->string());
+       return buffer;
+}
 /**
  * @brief Returns the source of the instance.
  *
index e081021173388cb1b0010b285ef41b432cd32b27..fb4c0e476bf9c97120f56950f10ce7679c85e105 100644 (file)
@@ -14,7 +14,7 @@
 
 //#define RPL_LEXER_TRACE
 
-enum RplTokenType {
+enum ReTokenType {
        TOKEN_UNDEF,
        TOKEN_STRING,
        TOKEN_NUMBER,
@@ -39,30 +39,33 @@ class ReLexer;
 
 class ReToken {
 public:
-       ReToken(RplTokenType type);
+       ReToken();
+       ReToken(ReTokenType type);
        ~ReToken();
        ReToken(const ReToken& source);
        ReToken& operator =(const ReToken& source);
 public:
        friend class ReLexer;
-       const QByteArray& toString();
-       bool isInteger();
        int asInteger() const;
-       quint64 asUInt64() const;
        qreal asReal() const;
-       const QByteArray& rawString() const;
-       int id() const;
-       RplTokenType tokenType() const;
-       bool isTokenType(RplTokenType expected) const;
-       bool isOperator(int expected, int alternative = 0) const;
-       bool isKeyword(int expected, int alternative = 0) const;
+       quint64 asUInt64() const;
+       QByteArray asUtf8() const;
        void clear();
-       bool isCapitalizedId() const;
        QByteArray dump() const;
-       static const char* nameOfType(RplTokenType type);
-       QByteArray asUtf8() const;
+       int id() const;
+       bool isCapitalizedId() const;
+       bool isInteger(int* value = NULL);
+       bool isKeyword(int expected, int alternative = 0) const;
+       bool isTokenType(ReTokenType expected) const;
+       bool isOperator(int expected, int alternative = 0) const;
+       const QByteArray& rawString() const;
+       const QByteArray& string() const;
+       ReTokenType tokenType() const;
+       const QByteArray& toString();
+public:
+       static const char* nameOfType(ReTokenType type);
 protected:
-       RplTokenType m_tokenType;
+       ReTokenType m_tokenType;
        QByteArray m_string;
        // only for TOKEN_STRING: copy from source but with escaped chars like "\\n"
        QByteArray m_printableString;
@@ -86,7 +89,7 @@ public:
                NUMTYPE_FLOAT = 1 << 3,
                ///
                NUMTYPE_ALL_INTEGER = NUMTYPE_DECIMAL | NUMTYPE_OCTAL
-                   | NUMTYPE_HEXADECIMAL,
+                       | NUMTYPE_HEXADECIMAL,
                NUMTYPE_ALL = NUMTYPE_ALL_INTEGER | NUMTYPE_FLOAT
        };
        enum CharClassTag {
@@ -143,7 +146,7 @@ public:
                SF_DOUBLE_DELIM = 1 << 6,
                // Redefinitions for better reading:
                SF_LIKE_C = SF_TICK | SF_QUOTE | SF_C_ESCAPING | SF_C_SPECIAL
-                   | SF_C_HEX_CHARS
+                       | SF_C_HEX_CHARS
        };
        enum StorageFlags {
                S_UNDEF,
@@ -161,41 +164,45 @@ public:
 
 public:
        ReLexer(ReSource* source, const char* keywords, const char* operators,
-           const char* rightAssociatives, const char* comments,
-           const char* firstCharsId = "a-zA-Z_", const char* restCharsId =
-               "a-zA-Z0-9_",
-           int numericTypes = NUMTYPE_DECIMAL | NUMTYPE_HEXADECIMAL
-               | NUMTYPE_FLOAT,
-           int stringFeatures = SF_TICK | SF_QUOTE | SF_C_ESCAPING | SF_C_SPECIAL
-               | SF_C_HEX_CHARS, int storageFlags = STORE_NOTHING);
+               const char* rightAssociatives, const char* comments,
+               const char* firstCharsId = "a-zA-Z_", const char* restCharsId =
+                       "a-zA-Z0-9_",
+               int numericTypes = NUMTYPE_DECIMAL | NUMTYPE_HEXADECIMAL
+                       | NUMTYPE_FLOAT,
+               int stringFeatures = SF_TICK | SF_QUOTE | SF_C_ESCAPING | SF_C_SPECIAL
+                       | SF_C_HEX_CHARS, int storageFlags = STORE_NOTHING);
        virtual ~ReLexer();
 public:
-       ReToken* nextToken();
-       void undoLastToken();
-       void undoLastToken2();
-       void saveLastToken();
-       ReToken* peekNonSpaceToken();
-       ReToken* nextNonSpaceToken();
+       const ReSourcePosition* currentPosition() const;
+       ReToken* currentToken() const;
+       bool isRightAssociative(int op) const;
        size_t maxTokenLength() const;
+       const QByteArray& nameOfKeyword(int keyword) const;
+       const QByteArray& nameOfOp(int op) const;
+       virtual ReToken* nextToken();
+       ReToken* nextNonSpaceToken();
+       ReToken* peekNonSpaceToken();
+       int prioOfOp(int op) const;
+       void saveLastToken();
        void setMaxTokenLength(size_t maxTokenLength);
-       void startUnit(ReSourceUnitName unit);
+       void setStoreAll(bool storeAll);
        ReSource* source();
-       int prioOfOp(int op) const;
-       const QByteArray& nameOfOp(int op) const;
-       bool isRightAssociative(int op) const;
-       const ReSourcePosition* currentPosition() const;
-       ReToken* currentToken() const;
+       void startUnit(ReSourceUnitName unit);
+       QByteArray& textOfToken(ReToken* token, QByteArray& buffer);
+       void undoLastToken();
+       void undoLastToken2();
 #if defined RPL_LEXER_TRACE
        bool trace() const;
        void setTrace(bool trace);
 #endif
+
 private:
-       void prepareOperators(const char* operators, const char* rightAssociatives);
-       void initializeComments(const char* comments);
        bool fillInput();
        int findInVector(int tokenLength, const StringList& vector);
-       ReToken* findTokenWithId(RplTokenType tokenType, int flag2,
-           StringList& names);
+       ReToken* findTokenWithId(ReTokenType tokenType, int flag2,
+               StringList& names);
+       void initializeComments(const char* comments);
+       void prepareOperators(const char* operators, const char* rightAssociatives);
        ReToken* scanNumber();
        ReToken* scanString();
        void scanComment();
@@ -203,6 +210,8 @@ protected:
        ReSource* m_source;
        /// sorted, string ends with the id of the keyword
        StringList m_keywords;
+       // index is operator id:
+       StringList m_keywordNames;
        // sorted, string ends with the id of the operator
        StringList m_operators;
        // sorted, each entry ends with the id of the comment start
@@ -230,6 +239,7 @@ protected:
        QByteArray m_input;
        int m_currentCol;
        bool m_hasMoreInput;
+       bool m_storeAll;
        int m_stringFeatures;
        int m_storageFlags;
        /// priority of the operators: index: id of the operator. content: prio
index a8f1ddbf207a819b2d9ca0c751ae683eacbaa1fd..437747b5c8916711ec37134382316e8d84a1df69 100644 (file)
@@ -39,7 +39,7 @@
  * @return
  */
 ReSyntaxError::ReSyntaxError(const char* reason) :
-           m_reason(reason) {
+               m_reason(reason) {
 }
 /**
  * @brief Returns the description of the exception.
@@ -63,8 +63,8 @@ const char* ReSyntaxError::reason() const {
  * @brief Constructor.
  * @param reason        the reason of the exception
  */
-RplParserStop::RplParserStop(const char* reason) :
-           ReSyntaxError(reason) {
+ReParserStop::ReParserStop(const char* reason) :
+               ReSyntaxError(reason) {
 }
 
 /** @class ReParser ReParser.hpp "expr/ReParser.hpp"
@@ -77,16 +77,14 @@ RplParserStop::RplParserStop(const char* reason) :
  * @brief Constructor.
  *
  * @param lexer     the tokenizer
- * @param tree      the abstract syntax tree
- */
-ReParser::ReParser(ReLexer& lexer, ReASTree& tree) :
-           m_lexer(lexer),
-           m_tree(tree),
-           m_messages(),
-           m_errors(0),
-           m_warnings(0),
-           m_maxErrors(20),
-           m_maxWarnings(20) {
+ */
+ReParser::ReParser(ReLexer& lexer) :
+               m_lexer(lexer),
+               m_messages(),
+               m_errors(0),
+               m_warnings(0),
+               m_maxErrors(20),
+               m_maxWarnings(20) {
 }
 
 /**
@@ -99,11 +97,11 @@ ReParser::ReParser(ReLexer& lexer, ReASTree& tree) :
  * @return          false (for chaining)
  */
 bool ReParser::addSimpleMessage(LevelTag prefix, int location,
-    const ReSourcePosition* position, const char* message) {
+       const ReSourcePosition* position, const char* message) {
        char buffer[2048];
        QByteArray msg;
        qsnprintf(buffer, sizeof buffer, "%c%04d %s:%d-%d: ", prefix, location,
-           position->sourceUnit()->name(), position->lineNo(), position->column());
+               position->sourceUnit()->name(), position->lineNo(), position->column());
        int used = strlen(buffer);
        int length = strlen(message);
        if (length >= (int) sizeof buffer - used)
@@ -125,7 +123,7 @@ bool ReParser::addSimpleMessage(LevelTag prefix, int location,
  * @return          false (for chaining)
  */
 bool ReParser::addMessage(LevelTag prefix, int location,
-    const ReSourcePosition* position, const char* format, va_list varList) {
+       const ReSourcePosition* position, const char* format, va_list varList) {
        char buffer[2048];
        qvsnprintf(buffer, sizeof buffer, format, varList);
        return addSimpleMessage(prefix, location, position, buffer);
@@ -161,12 +159,12 @@ void ReParser::syntaxError(int location, const char* message) {
  */
 
 void ReParser::syntaxError(int location, const char* message,
-    const char* symbol, const ReSourcePosition* position) {
+       const char* symbol, const ReSourcePosition* position) {
        char buffer[256];
        char buffer2[512];
        qsnprintf(buffer2, sizeof buffer2,
-           "The starting symbol %s is located here. Missing point: %s", symbol,
-           m_lexer.currentPosition()->utf8(buffer, sizeof buffer));
+               "The starting symbol %s is located here. Missing point: %s", symbol,
+               m_lexer.currentPosition()->utf8(buffer, sizeof buffer));
 
        addSimpleMessage(LT_ERROR, location, m_lexer.currentPosition(), message);
        addSimpleMessage(LT_INFO, location + 1, position, buffer2);
@@ -189,7 +187,7 @@ bool ReParser::error(int location, const char* format, ...) {
        addMessage(LT_ERROR, location, m_lexer.currentPosition(), format, ap);
        va_end(ap);
        if (++m_errors >= m_maxErrors)
-               throw RplParserStop("too many errors");
+               throw ReParserStop("too many errors");
        return false;
 }
 /**
@@ -205,11 +203,11 @@ bool ReParser::error(int location, const char* format, ...) {
  * @return          false (for chaining)
  */
 bool ReParser::error(int location, const ReSourcePosition* position,
-    const char* message, const char* message2) {
+       const char* message, const char* message2) {
        addSimpleMessage(LT_ERROR, location, m_lexer.currentPosition(), message);
        addSimpleMessage(LT_INFO, location + 1, position, message2);
        if (++m_errors >= m_maxErrors)
-               throw RplParserStop("too many errors");
+               throw ReParserStop("too many errors");
        return false;
 }
 
@@ -228,7 +226,7 @@ void ReParser::warning(int location, const char* format, ...) {
        addMessage(LT_WARNING, location, m_lexer.currentPosition(), format, ap);
        va_end(ap);
        if (++m_warnings >= m_maxWarnings)
-               throw RplParserStop("too many warnings");
+               throw ReParserStop("too many warnings");
 }
 /**
  * @brief Return the number of errors.
index ebdd4066b4381508a9327ca02f449cbd43410630..d265034fa8d211dd248e3300b5a4635e09d6bc2d 100644 (file)
@@ -21,9 +21,9 @@ private:
        const char* m_reason;
 };
 
-class RplParserStop: public ReSyntaxError {
+class ReParserStop: public ReSyntaxError {
 public:
-       RplParserStop(const char* reason);
+       ReParserStop(const char* reason);
 };
 
 class ReParser {
@@ -37,24 +37,24 @@ public:
 public:
        typedef QList<QByteArray> MessageList;
 public:
-       ReParser(ReLexer& lexer, ReASTree& ast);
+       ReParser(ReLexer& lexer);
 public:
        bool addSimpleMessage(LevelTag prefix, int location,
-           const ReSourcePosition* pos, const char* message);
+               const ReSourcePosition* pos, const char* message);
        bool addMessage(LevelTag prefix, int location, const ReSourcePosition* pos,
-           const char* format, va_list varList);
+               const char* format, va_list varList);
        void syntaxError(int location, const char* message);
        void syntaxError(int location, const char* message, const char* symbol,
-           const ReSourcePosition* position);
+               const ReSourcePosition* position);
        bool error(int location, const char* format, ...);
        bool error(int location, const ReSourcePosition* position,
-           const char* message, const char* message2);
+               const char* message, const char* message2);
        void warning(int location, const char* format, ...);
        int errors() const;
        int warnings() const;
 protected:
        ReLexer& m_lexer;
-       ReASTree& m_tree;
+       ReASTree* m_tree;
        MessageList m_messages;
        int m_errors;
        int m_warnings;
index 37ad07592861f0f023c3c2c812104f6a051cd09b..1633b4bf8233e7259618aa729cbf05a70f7f3d53 100644 (file)
@@ -35,6 +35,10 @@ enum {
        LOC_FILESYSTEM,
        LOC_RANDOMIZER,
        LOC_CRYPTFILESYSTEM,
+
+       // Applications:
+       LOC_RECFORM_CPPPARSER = 201,
+       LOC_RECFORM_CPPFORMATTER = 202,
 };
 #define LOC_FIRST_OF(moduleNo) (moduleNo*100+1)
 class RplModules {