From: hama Date: Sun, 27 Dec 2015 22:53:32 +0000 (+0100) Subject: recform: initial state X-Git-Url: https://gitweb.hamatoma.de/?a=commitdiff_plain;h=44b8da4bb0d51f56deb45a2b076447651f5901c0;p=reqt recform: initial state --- diff --git a/appl/recform/CFormatter.cpp b/appl/recform/CFormatter.cpp index be92045..60e1e87 100644 --- a/appl/recform/CFormatter.cpp +++ b/appl/recform/CFormatter.cpp @@ -1,21 +1,85 @@ /* - * Licence: + * CFormatter.cpp + * + * (Un)License: Public Domain * You can use and modify this file without any restriction. - * There is no warranty. - * You also can use the licence from http://www.wtfpl.net/. - * The original sources can be found on https://github.com/republib. -*/ + * Do what you want. + * No warranties and disclaimer of any damages. + * More info: http://unlicense.org + * The latest sources: https://github.com/republib + */ -#include "CFormatter.hpp" +#include "recformmain.hpp" + +enum { + LOC_RESTORE_PARENTH_STACK_1 = LOC_FIRST_OF(LOC_RECFORM_CPPFORMATTER), // 20201 +}; /** * Constructor. * - * @param args the program arguments + * @param args the program arguments + * @param logger the logger + */ +CFormatter::CFormatter(ReProgramArgs& args, ReLogger* logger) : + m_args(args), + m_blockLevel(0), + m_parenthLevel(0), + m_parenthLevelStack(), + m_logicalLine(), + m_continued(false), + m_lastDeclToken(-1), + m_writer(NULL), + m_logger(logger), + m_tabSize(4), + m_useTab(true), + m_lexer(NULL), + m_parser(NULL) +{ +} + +/** + * @file + * + * The formatter collects logical lines: this is a collection of symbols + * which should be written in one line. + * If the line length is to large, this logical line will be divided into + * more real lines with a higher indention. + */ + +/** + * Adds a token to the current logical line. + * @param token + */ +void CFormatter::addToken(ReToken* token) +{ + m_logicalLine.push_back(FormatToken(token, m_parenthLevel)); +} + +/** + * Write the current locical line to the output media. + * + * @param isPart true: the logical line may be continued */ -CFormatter::CFormatter(ReProgramArgs& args) : - m_args(args) +void CFormatter::flush(bool isPart) { + if (m_logicalLine.size() > 0){ + QByteArray buffer; + buffer.reserve(8096); + indent(m_blockLevel + m_continued ? 1 : 0, buffer); + FormatToken* lastItem = &m_logicalLine.data()[0]; + buffer.append(lastItem->toString()); + for (int ix = 1; ix < m_logicalLine.size(); ix++){ + FormatToken* item = &m_logicalLine.data()[ix]; + if (needsBlank(lastItem, item, ix <= m_lastDeclToken)) + buffer.append(' '); + m_lexer->textOfToken(item, buffer); + lastItem = item; + } + m_writer->writeLine(buffer); + m_logicalLine.clear(); + } + m_continued = isPart; } /** @@ -26,21 +90,7 @@ CFormatter::CFormatter(ReProgramArgs& args) : void CFormatter::format(const char* filename) { ReSimpleSource source(filename); - const char* keywords = ""; - const char* operators = ""; - const char* rightAssociatives = ""; - const char* comments = ""; - const char* firstCharsId = "a-zA-Z_"; - const char* restCharsId = "a-zA-Z0-9_"; - int numericTypes = ReLexer::NUMTYPE_DECIMAL | ReLexer::NUMTYPE_HEXADECIMAL - | ReLexer::NUMTYPE_FLOAT; - int stringFeatures = ReLexer::SF_TICK | ReLexer::SF_QUOTE - | ReLexer::SF_C_ESCAPING | ReLexer::SF_C_SPECIAL | ReLexer::SF_C_HEX_CHARS; - int storageFlags = ReLexer::STORE_COMMENT | ReLexer::STORE_ORG_STRING; - ReLexer lexer(&source, keywords, operators, rightAssociatives, comments, - firstCharsId, restCharsId, numericTypes, stringFeatures, - storageFlags); - + CppLexer lexer(&source); } /** @@ -52,3 +102,324 @@ void CFormatter::formatAll() format(m_args.arg(ix)); } } + +/** + * Writes the indention characters (tabs or blanks) into a buffer. + * + * @param level the level to indent + * @param buffer OUT: the buffer to append + */ +void CFormatter::indent(int level, QByteArray& buffer){ + int count = m_blockLevel; + if (m_continued) + count++; + int cols = count * m_tabSize; + if (! m_useTab) + count *= m_tabSize; + while(--count > 0) + buffer.append(m_useTab ? '\t' : ' '); +} + +/** + * Handles a label. + * + * @precond the label name is the only element in the logical line and + * the current token is the ':' + */ +void CFormatter::label() +{ + +} + +/** + * Tests whether a blank is needed between two tokens. + * + * @param first the first token + * @param second the 2nd token + * @param isDeclaration true: tokens inside the type area of a + * declaration. Operators '&', '*'... are treated differently + * @return true: a blank is needed. + */ +bool CFormatter::needsBlank(ReToken* first, ReToken* second, bool isDeclaration){ + ReTokenType type1 = first->tokenType(); + ReTokenType type2 = second->tokenType(); + CppOperator op; + bool rc = false; + switch(type1){ + case TOKEN_STRING: + case TOKEN_NUMBER: + case TOKEN_REAL: + switch(type2){ + case TOKEN_STRING: + case TOKEN_NUMBER: + case TOKEN_REAL: + case TOKEN_KEYWORD: + case TOKEN_ID: + case TOKEN_COMMENT_START: + rc = true; + break; + case TOKEN_OPERATOR: + rc = needsPrecedingBlank((CppOperator) second->id()); + break; + default: + rc = true; + } + break; + case TOKEN_KEYWORD: + case TOKEN_ID: + switch(type2){ + case TOKEN_STRING: + case TOKEN_NUMBER: + case TOKEN_REAL: + case TOKEN_KEYWORD: + case TOKEN_ID: + rc = true; + break; + case TOKEN_OPERATOR: + if (! isDeclaration) + rc = needsPrecedingBlank((CppOperator) second->id()); + else { + rc = (op = (CppOperator) second->id()) != OP_STAR + && op != OP_BIT_AND && op != OP_GT && op != OP_LT; + } + break; + default: + rc = true; + break; + } + break; + case TOKEN_OPERATOR: + switch(type2){ + case TOKEN_STRING: + case TOKEN_NUMBER: + case TOKEN_REAL: + case TOKEN_KEYWORD: + case TOKEN_ID: + rc = needsTrailingBlank((CppOperator) first->id()); + break; + case TOKEN_OPERATOR: + if (! isDeclaration) + rc = needsTrailingBlank((CppOperator) first->id()) + || needsPrecedingBlank((CppOperator) second->id()); + else { + rc = (op = (CppOperator) second->id()) != OP_GT && op != OP_LT; + } + break; + default: + rc = true; + break; + } + break; + case TOKEN_COMMENT_START: + rc = true; + break; + default: + break; + } + return rc; +} + +/** + * Tests whether an operator needs a preceding blank. + * + * @param op operator to test + * @return true: a blank is needed. + */ + +bool CFormatter::needsPrecedingBlank(CppOperator op) +{ + bool rc = true; + switch(op){ + case OP_QUESTIONMARK: + case OP_COLON: + case OP_ASSIGN: + case OP_PLUS_ASSIGN: + case OP_MINUS_ASSIGN: + case OP_DIV_ASSIGN: + case OP_MOD_ASSIGN: + case OP_LSHIFT_ASSIGN: + case OP_RSHIFT_ASSIGN: + case OP_AND_ASSIGN: + case OP_XOR_ASSIGN: + case OP_OR_ASSIGN: + case OP_OR: + case OP_AND: + case OP_BIT_OR: + case OP_BIT_XOR: + case OP_BIT_AND: + case OP_LSHIFT: + case OP_RSHIFT: + case OP_LT: + case OP_LE: + case OP_GT: + case OP_GE: + case OP_EQ: + case OP_NE: + case OP_PLUS: + case OP_MINUS: + case OP_STAR: + case OP_DIV: + case OP_MOD: + case OP_NOT: + rc = true; + default: + break; + } + return rc; +} + +/** + * Tests whether an operator needs a trailing blank. + * + * @param op operator to test + * @return true: a blank is needed. + */ + +bool CFormatter::needsTrailingBlank(CppOperator op) +{ + bool rc = true; + switch(op){ + case OP_COMMA: + case OP_QUESTIONMARK: + case OP_COLON: + case OP_ASSIGN: + case OP_PLUS_ASSIGN: + case OP_MINUS_ASSIGN: + case OP_DIV_ASSIGN: + case OP_MOD_ASSIGN: + case OP_LSHIFT_ASSIGN: + case OP_RSHIFT_ASSIGN: + case OP_AND_ASSIGN: + case OP_XOR_ASSIGN: + case OP_OR_ASSIGN: + case OP_OR: + case OP_AND: + case OP_BIT_OR: + case OP_BIT_XOR: + case OP_BIT_AND: + case OP_LSHIFT: + case OP_RSHIFT: + case OP_LT: + case OP_LE: + case OP_GT: + case OP_GE: + case OP_EQ: + case OP_NE: + case OP_PLUS: + case OP_MINUS: + case OP_STAR: + case OP_DIV: + case OP_MOD: + case OP_NOT: + default: + break; + } + return rc; +} + +/** + * Returns the parser. + * + * @return the parser + */ +CppParser* CFormatter::parser() const +{ + return m_parser; +} + +/** + * Store the current parenthesis level and set it to 0. + */ +void CFormatter::restoreParenthLevel(){ + if (m_parenthLevelStack.size() <= 0) + m_logger->log(LOG_ERROR, LOC_RESTORE_PARENTH_STACK_1, "stack underflow"); + else { + m_parenthLevel = m_parenthLevelStack.takeFirst(); + } +} + +/** + * Sets the block level. + * + * @param parenthLevel 0: set it to 0
+ * otherwise: increment it by this level + */ +void CFormatter::setBlockLevel(int blockLevel) +{ + if (blockLevel == 0) + m_blockLevel = 0; + else + m_blockLevel += blockLevel; +} + +/** + * Sets the last index of declaration tokens. + * + * @param lastDeclToken the index to set + */ +void CFormatter::setLastDeclToken(int lastDeclToken) +{ + m_lastDeclToken = lastDeclToken; +} + +/** + * Sets the lexer. + * + * @param lexer the lexer to set + */ +void CFormatter::setLexer(CppLexer* lexer) +{ + m_lexer = lexer; +} + +/** + * Sets the parenthesis level. + * + * @param parenthLevel 0: set it to 0
+ * otherwise: increment it by this level + */ +void CFormatter::setParenthLevel(int parenthLevel) +{ + if (parenthLevel == 0) + m_parenthLevel = 0; + else + m_parenthLevel += parenthLevel; +} + +/** + * Sets the parser. + * + * @param parser the parser to set + */ +void CFormatter::setParser(CppParser* parser) +{ + m_parser = parser; +} + +/** + * Store the current parenthesis level and set it to 0. + */ +void CFormatter::saveAndResetParenthLevel(){ + m_parenthLevelStack.push_back(m_parenthLevel); + m_parenthLevel = 0; +} + +/** + * Sets the writer. + * + * @param writer the writer to set + */ +void CFormatter::setWriter(ReWriter* writer) +{ + m_writer = writer; +} + +/** + * Returns the current writer. + * + * @return the current writer + */ +ReWriter* CFormatter::writer() const +{ + return m_writer; +} diff --git a/appl/recform/CFormatter.hpp b/appl/recform/CFormatter.hpp index 16ad68c..de993b6 100644 --- a/appl/recform/CFormatter.hpp +++ b/appl/recform/CFormatter.hpp @@ -1,10 +1,13 @@ /* - * Licence: + * CFormatter.hpp + * + * (Un)License: Public Domain * You can use and modify this file without any restriction. - * There is no warranty. - * You also can use the licence from http://www.wtfpl.net/. - * The original sources can be found on https://github.com/republib. -*/ + * Do what you want. + * No warranties and disclaimer of any damages. + * More info: http://unlicense.org + * The latest sources: https://github.com/republib + */ #ifndef CFORMATTER_HPP #define CFORMATTER_HPP @@ -15,15 +18,74 @@ #include "expr/reexpr.hpp" #endif +/** + * The smallest (not separable) unit of an output line. + */ +class FormatToken : public ReToken { +public: + /** Constructor. + */ + FormatToken() : + ReToken(TOKEN_UNDEF), + m_level(0){ + } + + /** Constructor. + * @param token the token from the lexer + * @param level the parenthesis level + */ + FormatToken(ReToken* token, int level) : + ReToken(*token), + m_level(level){ + } +private: + int m_level; +}; +class CppParser; +class CppLexer; + class CFormatter { public: - CFormatter(ReProgramArgs& args); + CFormatter(ReProgramArgs& args, ReLogger* logger); public: + void addToken(ReToken* token); + void flush(bool isPart); void format(const char* filename); void formatAll(); + void label(); + CppParser* parser() const; + void restoreParenthLevel(); + void saveAndResetParenthLevel(); + void setBlockLevel(int blockLevel); + void setLastDeclToken(int lastDeclToken); + void setLexer(CppLexer* lexer); + void setParenthLevel(int parenthLevel); + void setParser(CppParser* parser); + void setWriter(ReWriter* writer); + ReWriter* writer() const; + +protected: + void indent(int level, QByteArray& buffer); + bool needsPrecedingBlank(CppOperator op); + bool needsTrailingBlank(CppOperator op); + bool needsBlank(ReToken* first, ReToken* second, bool isDeclaration); private: ReProgramArgs& m_args; + int m_blockLevel; + int m_parenthLevel; + QStack m_parenthLevelStack; + QVector m_logicalLine; + /// true: a part of the logical line is already written (and removed) + bool m_continued; + /// index of the token which ends the declaration: + int m_lastDeclToken; + ReWriter* m_writer; + ReLogger* m_logger; + int m_tabSize; + bool m_useTab; + CppLexer* m_lexer; + CppParser* m_parser; }; #endif // CFORMATTER_HPP diff --git a/appl/recform/CppDefinitions.hpp b/appl/recform/CppDefinitions.hpp new file mode 100644 index 0000000..78ac546 --- /dev/null +++ b/appl/recform/CppDefinitions.hpp @@ -0,0 +1,143 @@ +/* + * CppDefinitions.hpp + * + * (Un)License: Public Domain + * You can use and modify this file without any restriction. + * Do what you want. + * No warranties and disclaimer of any damages. + * More info: http://unlicense.org + * The latest sources: https://github.com/republib + */ + +#ifndef CPPDEFINITIONS_HPP +#define CPPDEFINITIONS_HPP + +enum CppKeyword { + K_UNDEF, + K_ALIGNAS, + K_ALIGNOF, + K_ASM, + K_AUTO, + K_BOOL, + K_BREAK, + K_CASE, + K_CATCH, + K_CHAR, + K_CHAR16_T, + K_CHAR32_T, + K_CLASS, + K_CONST, + K_CONSTEXPR, + K_CONST_CAST, + K_CONTINUE, + K_DECLTYPE, + K_DEFAULT, + K_DELETE, + K_DO, + K_DOUBLE, + K_DYNAMIC_CAST, + K_ELSE, + K_ENUM, + K_EXPLICIT, + K_EXPORT, + K_EXTERN, + K_FALSE, + K_FLOAT, + K_FOR, + K_FRIEND, + K_GOTO, + K_IF, + K_INLINE, + K_INT, + K_LONG, + K_MUTABLE, + K_NAMESPACE, + K_NEW, + K_NOEXCEPT, + K_NULLPTR, + K_OPERATOR, + K_PRIVATE, + K_PROTECTED , + K_PUBLIC, + K_REGISTER, + K_REINTERPRET_CAST, + K_RETURN, + K_SHORT, + K_SIGNED, + K_SIZEOF, + K_STATIC, + K_STATIC_ASSERT, + K_STATIC_CAST, + K_STRUCT, + K_SWITCH, + K_TEMPLATE, + K_THIS, + K_THREAD_LOCAL, + K_THROW, + K_TRUE, + K_TRY, + K_TYPEDEF, + K_TYPEID, + K_TYPENAME, + K_UNION, + K_UNSIGNED, + K_USING, + K_VIRTUAL, + K_VOID, + K_VOLATILE, + K_WCHAR_T, + K_WHILE + }; +enum CppOperator{ + OP_UNDEF, + OP_COMMA, + OP_QUESTIONMARK, + OP_COLON, + OP_ASSIGN, + OP_PLUS_ASSIGN, + OP_MINUS_ASSIGN, + OP_DIV_ASSIGN, + OP_MOD_ASSIGN, + OP_LSHIFT_ASSIGN, + OP_RSHIFT_ASSIGN, + OP_AND_ASSIGN, + OP_XOR_ASSIGN, + OP_OR_ASSIGN, + OP_OR, + OP_AND, + OP_BIT_OR, + OP_BIT_XOR, + OP_BIT_AND, + OP_LSHIFT, + OP_RSHIFT, + OP_LT, + OP_LE, + OP_GT, + OP_GE, + OP_EQ, + OP_NE, + OP_PLUS, + OP_MINUS, + OP_STAR, + OP_DIV, + OP_MOD, + OP_DOT_STAR, + OP_ARROW_STAR, + OP_PLUS_PLUS, + OP_MINUS_MINUS, + OP_NOT, + OP_BIT_NOT, + OP_LPARENTH, + OP_RPARENTH, + OP_LBRACKET, + OP_RBRACKET, + OP_DOT, + OP_ARROW, + OP_BELONGS, + OP_LBRACE, + OP_RBRACE, + OP_SEMICOLON, + OP_COUNT +}; + +#endif // CPPDEFINITIONS_HPP diff --git a/appl/recform/CppParser.cpp b/appl/recform/CppParser.cpp new file mode 100644 index 0000000..5eca9ab --- /dev/null +++ b/appl/recform/CppParser.cpp @@ -0,0 +1,355 @@ +/* + * CppParser.cpp + * + * (Un)License: Public Domain + * You can use and modify this file without any restriction. + * Do what you want. + * No warranties and disclaimer of any damages. + * More info: http://unlicense.org + * The latest sources: https://github.com/republib + */ + +#include "recformmain.hpp" + +enum { + LOC_FORMATASTREE_1 = LOC_FIRST_OF(LOC_RECFORM_CPPPARSER), // 20101 + LOC_BODY_1, // 20102 + LOC_BODY_2, // 20103 + LOC_STARTS_WITH_ID_1, // 20104 + LOC_BODY_3, // 20104 +}; + +const char* CppParser::m_keywords = "alignas alignof asm auto bool break case catch " + "char char16_t char32_t class const constexpr const_cast continue " + "decltype default delete do double dynamic_cast else enum explicit " + "export extern false float for friend goto if inline int long mutable " + "namespace new noexcept nullptr operator private protected public " + "register reinterpret_cast return short signed sizeof static " + "static_assert static_cast struct switch template this thread_local " + "throw true try typedef typeid typename union unsigned using virtual " + "void volatile wchar_t while"; +const char* CppParser::m_operators = ",\n? : = += -= /= %= <<= >>= &= ^= |=\n" + "||\n&&\n|\n^\n&\n<< >>\n< <= > >=\n== !=\n+ -\n* / %\n.* ->*\n" + "++ -- ! ~\n( ) [ ] . ->\n::\n{ } ;"; +const char* CppParser::m_rightAssociatives = "? : = += -= /= %= <<= >>= &= ^= |="; +const char* CppParser::m_comments = "/* */ // \n"; +const char* CppParser::m_firstCharsId = "a-zA-Z_"; +const char* CppParser::m_restCharsId = "a-zA-Z0-9_"; + +/** + * Constructor. + * + * @param lexer the tokenizer delivering the syntax elements + * @param formatter the formatter info + */ +CppParser::CppParser(CppLexer& lexer, CFormatter& formatter) : + ReParser(lexer), + m_formatter(formatter), + m_newlinePreceds(false) +{ + m_formatter.setLexer(&lexer); + m_formatter.setParser(this); +} + +/** + * Handles a sequence of blanks. + * + * Find out whether a newline is in the blank sequence. + * + * @param token the token with the first blank + */ +void CppParser::blank(ReToken* token) +{ + m_newlinePreceds = token->toString().indexOf('\n') >= 0; + while( (token = m_lexer.nextToken()) != NULL && token->tokenType() == TOKEN_SPACE) + if (token->toString().indexOf('\n') >= 0) + m_newlinePreceds = true; + if (token->tokenType() != TOKEN_END_OF_SOURCE) + m_lexer.undoLastToken(); +} + +/** + * Parses the current file. + */ +void CppParser::parse() +{ + body(BT_PROG); +} + +/** + * Handle a declaration or an expression or a label. + * + * @param token the first token of the logical line + */ +void CppParser::nonStatement(ReToken* token) +{ + m_formatter.addToken(token); + int count = 0; + bool again = true; + ReTokenType tokenType; + bool isDecl = true; + m_formatter.saveAndResetParenthLevel(); + while(again) { + count++; + token = m_lexer.nextToken(); + tokenType = token->tokenType(); + if (count == 2 && token->isOperator(OP_COLON)) + m_formatter.label(); + else { + m_formatter.addToken(token); + + if (tokenType == TOKEN_OPERATOR) { + if (isAssignment((CppOperator) token->id())) + isDecl = false; + else if (token->isOperator(OP_LPARENTH, OP_LBRACKET)) + m_formatter.setParenthLevel(+1); + else if (token->isOperator(OP_RPARENTH, OP_RBRACKET)) + m_formatter.setParenthLevel(-1); + } + + } + + } + m_formatter.restoreParenthLevel(); +} + +/** + * Handles a statement. + * + * @param token the first token of the statement + */ +void CppParser::statement(ReToken* token) +{ +} +/** + * Handles a class/struct/union header. + * + * @param type TOKEN_CLASS, TOKEN_STRUCT or TOKEN_UNION + */ +void CppParser::classHeader(ReTokenType type) +{ + +} + +/** + * Handles a declaration. + * + * @param keyword the intro of the declaration + */ +void CppParser::declaration(CppKeyword keyword) +{ + +} + +/** + * Handles a expression statement. + * + * This is "one logical line" statement, e.g. an assignment. + * + * @param token the first token + */ +void CppParser::exprStatement(ReToken* token) +{ + do { + m_formatter.addToken(token); + } while (token->isOperator(OP_SEMICOLON)); + m_formatter.flush(false); +} + + +/** + * Returns whether a keyword is a the start of a declaration. + * + * @param keyword the keyword to inspect + * @return true: the token is a start of a declaration + */ +bool CppParser::isDeclarationIntro(CppKeyword keyword){ + bool rc = false; + switch(keyword){ + case K_STATIC: + case K_AUTO: + case K_BOOL: + case K_INT: + case K_CHAR: + case K_CHAR16_T: + case K_CHAR32_T: + case K_CONST: + case K_FLOAT: + case K_DOUBLE: + case K_LONG: + case K_REGISTER: + case K_SIGNED: + case K_EXPORT: + case K_EXTERN: + case K_UNSIGNED: + case K_VOLATILE: + case K_VIRTUAL: + case K_VOID: + rc = true; + break; + default: + break; + } + return rc; +} +/** + * Tests whether a operator is an assignment operator. + * + * @param op test to inspect + * @return true: op is an assignment operator + */ +bool CppParser::isAssignment(CppOperator op){ + bool rc = op >= OP_ASSIGN && op <= OP_OR_ASSIGN; + return rc; +} + +/** + * Returns whether a keyword is a the start of a declaration. + * + * @param keyword keyword to inspect + * @return true: the token is a start of a declaration + */ +bool CppParser::isStatement(CppKeyword keyword){ + bool rc = false; + switch(keyword){ + case K_DO: + case K_FOR: + case K_GOTO: + case K_IF: + case K_SWITCH: + case K_TRY: + case K_THROW: + case K_WHILE: + case K_RETURN: + case K_USING: + rc = true; + break; + default: + break; + } + return rc; +} + +/** + * Parses the current file. + */ +void CppParser::body(BodyType bodyType){ + ReToken* token; + ReTokenType type; + CppKeyword keyword; + while ( (token = m_lexer.nextToken()) != NULL + && (type = token->tokenType()) != TOKEN_END_OF_SOURCE){ + switch(token->tokenType()){ + case TOKEN_STRING: + case TOKEN_NUMBER: + case TOKEN_REAL: + if (bodyType == BT_CLASS || bodyType == BT_PROG) + syntaxError(LOC_BODY_1, "unexpected token"); + exprStatement(token); + break; + case TOKEN_OPERATOR: + if (token->isOperator(OP_RBRACE)) + break; + else + syntaxError(LOC_BODY_3, "unexpected token"); + break; + case TOKEN_ID: + nonStatement(token); + break; + case TOKEN_KEYWORD: + keyword = (CppKeyword) token->id(); + if (isStatement(keyword)) + statement(token); + else + nonStatement(token); + break; + case TOKEN_COMMENT_REST_OF_LINE: + case TOKEN_COMMENT_START: + case TOKEN_COMMENT_END: + comment(token); + break; + case TOKEN_SPACE: + blank(token); + break; + case TOKEN_END_OF_SOURCE: + break; + case TOKEN_UNDEF: + default: + syntaxError(LOC_BODY_2, "unexpected token"); + break; + } + } +} + +/** + * Handles a comment. + * + * Reads the comment and write it formatted. + * + * @param token the comment start + */ +void CppParser::comment(ReToken* token) +{ + +} + +/** + * Constructor. + * + * @param source the input source + */ +CppLexer::CppLexer(ReSource* source) : + ReLexer(source, CppParser::m_keywords, CppParser::m_operators, + CppParser::m_rightAssociatives, CppParser::m_comments, + CppParser::m_firstCharsId, CppParser::m_restCharsId, + ReLexer::NUMTYPE_DECIMAL | ReLexer::NUMTYPE_HEXADECIMAL + | ReLexer::NUMTYPE_FLOAT, + ReLexer::SF_TICK | ReLexer::SF_QUOTE | ReLexer::SF_C_ESCAPING + | ReLexer::SF_C_SPECIAL | ReLexer::SF_C_HEX_CHARS, + ReLexer::STORE_ALL), + m_preview(false), + m_queue(), + m_indexQueue(0) +{ + setStoreAll(true); +} + +/** + * Returns the next token. + * + * @return the next token + */ +ReToken*CppLexer::nextToken() +{ + ReToken* token; + if (m_preview || m_queue.size() == 0){ + token = ReLexer::nextToken(); + if (m_preview) + m_queue.append(*token); + } else { + if (++m_indexQueue < m_queue.size()){ + token = &m_queue.data()[m_indexQueue]; + } else { + m_queue.clear(); + token = ReLexer::nextToken(); + } + } + return token; +} + +/** + * Sets the preview mode. + * + * In the "preview mode" the tokens will be stored in a list. + * After reset the same tokens will be returned again with nextToken(). + * + * @param preview true: the tokens will be stored additionally + * false: the next token will be the first of the list + */ +void CppLexer::setPreview(bool preview) +{ + m_preview = preview; + if (preview) + m_queue.clear(); + m_indexQueue = -1; +} diff --git a/appl/recform/CppParser.hpp b/appl/recform/CppParser.hpp new file mode 100644 index 0000000..a215937 --- /dev/null +++ b/appl/recform/CppParser.hpp @@ -0,0 +1,78 @@ +/* + * CppParser.hpp + * + * (Un)License: Public Domain + * You can use and modify this file without any restriction. + * Do what you want. + * No warranties and disclaimer of any damages. + * More info: http://unlicense.org + * The latest sources: https://github.com/republib + */ + +#ifndef CPPPARSER_HPP +#define CPPPARSER_HPP + +enum CppSymbol { + CPP_UNDEF, + CPP_ClassIntro, + CPP_ClassBody, + CPP_FuncIntro, + CPP_FuncBody, +}; + +/** + * A lexical token analyser for C++ syntax. + */ +class CppLexer : public ReLexer{ +public: + CppLexer(ReSource* source); +public: + virtual ReToken* nextToken(); + void setPreview(bool preview); +private: + bool m_preview; + QVector m_queue; + int m_indexQueue; +}; + +class CFormatter; +/** + * A parser for C++. + */ +class CppParser : public ReParser +{ +public: + enum BodyType { + BT_PROG, BT_CLASS, BT_METHOD, BT_BLOCK + }; + +public: + CppParser(CppLexer& lexer, CFormatter& formatter); +public: + void blank(ReToken* token); + void body(BodyType type); + void classHeader(ReTokenType type); + void comment(ReToken* token); + void declaration(CppKeyword keyword); + void exprStatement(ReToken* token); + void parse(); + void nonStatement(ReToken* token); + void statement(ReToken* token); +protected: + void addToken(ReToken* token); + bool isDeclarationIntro(CppKeyword keyword); + bool isAssignment(CppOperator op); + bool isStatement(CppKeyword keyword); +private: + CFormatter& m_formatter; + bool m_newlinePreceds; +public: + static const char* m_keywords; + static const char* m_operators; + static const char* m_rightAssociatives; + static const char* m_comments; + static const char* m_firstCharsId; + static const char* m_restCharsId; +}; + +#endif // CPPPARSER_HPP diff --git a/appl/recform/cuReCFormatter.cpp b/appl/recform/cuReCFormatter.cpp new file mode 100644 index 0000000..836a9bc --- /dev/null +++ b/appl/recform/cuReCFormatter.cpp @@ -0,0 +1,95 @@ +/* + * cuReCFormatter.cpp + * + * (Un)License: Public Domain + * You can use and modify this file without any restriction. + * Do what you want. + * No warranties and disclaimer of any damages. + * More info: http://unlicense.org + * The latest sources: https://github.com/republib + */ +#include "recformmain.hpp" +/** + * @brief Unit test of CppFormatter. + */ +class TestCppFormatter: public ReTest { +public: + TestCppFormatter() : + ReTest("CppFormatter"), + m_args(""), + m_source(), + m_reader(m_source), + m_writer(), + m_formatter(m_args, &m_logger), + m_lexer(&m_source), + m_parser(m_lexer, m_formatter) + { + m_reader.addSource("std", ""); + m_source.addReader(&m_reader); + m_formatter.setWriter(&m_writer); + doIt(); + } + +public: + void setTokens(const char* input){ + m_reader.replaceSource("std", input); + ReToken* token; + do { + token = m_lexer.nextNonSpaceToken(); + m_formatter.addToken(token); + } while (! token->isTokenType(TOKEN_END_OF_SOURCE)); + } + + void testBasic() { + const char* line = "const char* ptr = (const char*) abc(1 + 2) * 3 / 4;"; + setTokens(line); + m_writer.buffer().clear(); + m_formatter.setLastDeclToken(4); + m_formatter.flush(true); + checkEqu(line, m_writer.buffer()); + } + void testLexer(){ + m_reader.replaceSource("std", "const int x = 3;"); + CppLexer lexer(&m_source); + lexer.setPreview(true); + ReToken* token; + do { + token = lexer.nextToken(); + } while (token->tokenType() != TOKEN_END_OF_SOURCE); + lexer.setPreview(false); + token = lexer.nextToken(); + checkEqu(TOKEN_KEYWORD, token->tokenType()); + checkEqu(K_CONST, token->id()); + + token = lexer.nextNonSpaceToken(); + checkEqu(TOKEN_KEYWORD, token->tokenType()); + checkEqu(K_INT, token->id()); + token = lexer.nextNonSpaceToken(); + checkEqu(TOKEN_ID, token->tokenType()); + checkEqu("x", token->toString()); + token = lexer.nextNonSpaceToken(); + checkT(token->isOperator(OP_ASSIGN)); + token = lexer.nextNonSpaceToken(); + int value; + checkT(token->isInteger(&value)); + checkEqu(3, value); + } + + virtual void runTests() { + testBasic(); + testLexer(); + + } + ReProgramArgs m_args; + ReSource m_source; + ReStringReader m_reader; + ReStringWriter m_writer; + CFormatter m_formatter; + CppLexer m_lexer; + CppParser m_parser; +}; + +void testCppFormatter() { + TestCppFormatter test; +} + diff --git a/appl/recform/recform.pro b/appl/recform/recform.pro index 321298e..726a233 100644 --- a/appl/recform/recform.pro +++ b/appl/recform/recform.pro @@ -16,21 +16,27 @@ INCLUDEPATH = ../.. SOURCES += recformmain.cpp \ ../../base/ReException.cpp \ + ../../base/ReByteStorage.cpp \ ../../base/ReStringUtils.cpp \ ../../base/ReQStringUtils.cpp \ ../../base/ReFileUtils.cpp \ ../../base/ReProgramArgs.cpp \ ../../base/ReCharPtrMap.cpp \ + ../../base/ReWriter.cpp \ ../../base/ReLogger.cpp \ + ../../base/ReTest.cpp \ ../../expr/ReSource.cpp \ ../../expr/ReLexer.cpp \ ../../expr/ReParser.cpp \ CFormatter.cpp \ - CppParser.cpp + CppParser.cpp \ + cuReCFormatter.cpp HEADERS += recformmain.hpp \ ../../base/rebase.hpp \ + ../../expr/reexpr.hpp \ CFormatter.hpp \ - cppparser.hpp + CppParser.hpp \ + CppDefinitions.hpp CODECFORSRC = UTF-8 diff --git a/appl/recform/recformmain.cpp b/appl/recform/recformmain.cpp new file mode 100644 index 0000000..6b58655 --- /dev/null +++ b/appl/recform/recformmain.cpp @@ -0,0 +1,41 @@ +/* + * recformmain.cpp + * + * (Un)License: Public Domain + * You can use and modify this file without any restriction. + * Do what you want. + * No warranties and disclaimer of any damages. + * More info: http://unlicense.org + * The latest sources: https://github.com/republib + */ + +#include +#include "recformmain.hpp" + +void test(){ + +} + +int main(int argc, char *argv[]) +{ + QCoreApplication a(argc, argv); + + ReProgramArgs args("$0 [ ...]\n" + "Formats c++ sources", "$0 -v"); + args.addBool("verbose", "logs additional information", 'v', "verbose", false); + args.init(argc, argv); + if (true || args.argCount() == 1 && strcmp(args.arg(0), "test") == 0){ + extern void testCppFormatter(); + testCppFormatter(); + return 0; + } else { + ReLogger logger; + logger.buildStandardAppender("recform"); + CFormatter formatter(args, &logger); + ReSource source; + CppLexer lexer(&source); + CppParser parser(lexer, formatter); + formatter.formatAll(); + return a.exec(); + } +} diff --git a/appl/recform/recformmain.hpp b/appl/recform/recformmain.hpp new file mode 100644 index 0000000..bdcbe98 --- /dev/null +++ b/appl/recform/recformmain.hpp @@ -0,0 +1,18 @@ +/* + * recformmain.hpp + * + * (Un)License: Public Domain + * You can use and modify this file without any restriction. + * Do what you want. + * No warranties and disclaimer of any damages. + * More info: http://unlicense.org + * The latest sources: https://github.com/republib + */ +#ifndef RECFORMMAIN_HPP +#define RECFORMMAIN_HPP +#include "base/rebase.hpp" +#include "expr/reexpr.hpp" +#include "CppDefinitions.hpp" +#include "CFormatter.hpp" +#include "CppParser.hpp" +#endif diff --git a/base/ReWriter.cpp b/base/ReWriter.cpp index f163bb7..1fdeac8 100644 --- a/base/ReWriter.cpp +++ b/base/ReWriter.cpp @@ -22,7 +22,7 @@ #include "base/rebase.hpp" const char* ReWriter::m_tabs = - "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t"; + "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t"; int ReWriter::m_maxIndention = strlen(ReWriter::m_tabs); /** @class ReWriter ReWriter.hpp "base/ReWriter.hpp" @@ -145,11 +145,11 @@ void ReWriter::formatIndented(int indent, const char* format, ...) { * @param eoln line end: "\n" or "\r\n" */ ReFileWriter::ReFileWriter(const char* filename, const char* mode, - FILE* additionalStream, const char* eoln) : - m_fp(fopen(filename, mode)), - m_name(filename), - m_eoln(eoln), - m_additionalStream(additionalStream) { + FILE* additionalStream, const char* eoln) : + m_fp(fopen(filename, mode)), + m_name(filename), + m_eoln(eoln), + m_additionalStream(additionalStream) { } /** @@ -190,3 +190,56 @@ void ReFileWriter::close() { } m_additionalStream = NULL; } + +/** + * Constructor. + */ +ReStringWriter::ReStringWriter() : + m_buffer() +{ +} + +/** + * Destructor. + */ +ReStringWriter::~ReStringWriter() +{ + +} + +/** + * Returns a reference to the internal buffer. + * + * @return the internal buffer + */ +QByteArray& ReStringWriter::buffer() +{ + return m_buffer; +} +/** + * Closes the writer. + */ +void ReStringWriter::close() +{ + // nothing to do +} + +/** + * Writes a string into the buffer. + * + * @param content string to write + */ +void ReStringWriter::write(const char* content) +{ + m_buffer.append(content); +} + +/** + * Writes a line and a newline char into the buffer. + * + * @param line the line to write (without trailing newline) + */ +void ReStringWriter::writeLine(const char* line) +{ + m_buffer.append(line).append('\n'); +} diff --git a/base/ReWriter.hpp b/base/ReWriter.hpp index 49d0f71..337c35d 100644 --- a/base/ReWriter.hpp +++ b/base/ReWriter.hpp @@ -41,10 +41,13 @@ protected: static int m_maxIndention; }; +/** + * Writes into a file. + */ class ReFileWriter: public ReWriter { public: ReFileWriter(const char* filename, const char* mode = "w", - FILE* additionalStream = NULL, const char* eoln = "\n"); + FILE* additionalStream = NULL, const char* eoln = "\n"); public: virtual void write(const char* line); virtual void writeLine(const char* line = NULL); @@ -56,4 +59,20 @@ protected: FILE* m_additionalStream; }; +/** + * Allows writing into a string buffer. + */ +class ReStringWriter : public ReWriter { +public: + ReStringWriter(); + virtual ~ReStringWriter(); +public: + QByteArray& buffer(); + virtual void close(); + virtual void write(const char* content); + virtual void writeLine(const char* line = NULL); +protected: + QByteArray m_buffer; +}; + #endif // REWRITER_HPP diff --git a/cunit/cuReLexer.cpp b/cunit/cuReLexer.cpp index 3f330c6..19c73d4 100644 --- a/cunit/cuReLexer.cpp +++ b/cunit/cuReLexer.cpp @@ -52,7 +52,7 @@ public: checkEqu(0.25, asReal()); } - ReToken* checkToken(ReToken* token, RplTokenType type, int id = 0, + ReToken* checkToken(ReToken* token, ReTokenType type, int id = 0, const char* string = NULL) { checkEqu(type, token->tokenType()); if (id != 0) diff --git a/cunit/cuReQStringUtils.cpp b/cunit/cuReQStringUtils.cpp index 34e1bd0..f32ae4b 100644 --- a/cunit/cuReQStringUtils.cpp +++ b/cunit/cuReQStringUtils.cpp @@ -33,7 +33,7 @@ public: } void testLengthOfUInt64() { - quint64 value = -3; + uint64_t value = -3; checkEqu(1, ReQStringUtils::lengthOfUInt64(ReString("0"), 0, 10, &value)); checkEqu(int64_t(0), value); diff --git a/cunit/cunit.pro b/cunit/cunit.pro index 181449c..7cb6f61 100644 --- a/cunit/cunit.pro +++ b/cunit/cunit.pro @@ -7,6 +7,8 @@ QT += core network gui greaterThan(QT_MAJOR_VERSION, 4): QT += widgets +CONFIG += c++11 + TARGET = cunit CONFIG += console CONFIG -= app_bundle diff --git a/expr/ReLexer.cpp b/expr/ReLexer.cpp index 9ffa4aa..fa12991 100644 --- a/expr/ReLexer.cpp +++ b/expr/ReLexer.cpp @@ -50,15 +50,28 @@ ReLexException::ReLexException(const ReSourcePosition& position, * @brief Implements a token which is the smallest unit for a parser. * */ + /** * @brief Constructor. * @param type token type */ -ReToken::ReToken(RplTokenType type) : - m_tokenType(type), - m_string(), - m_printableString() -// m_value +ReToken::ReToken() : + m_tokenType(TOKEN_UNDEF), + m_string(), + m_printableString(), + m_value() +{ + memset(&m_value, 0, sizeof m_value); +} +/** + * @brief Constructor. + * @param type token type + */ +ReToken::ReToken(ReTokenType type) : + m_tokenType(type), + m_string(), + m_printableString(), + m_value() { memset(&m_value, 0, sizeof m_value); } @@ -88,6 +101,7 @@ ReToken::ReToken(const ReToken& source) : ReToken& ReToken::operator =(const ReToken& source) { m_tokenType = source.m_tokenType; m_string = source.m_string; + m_printableString = source.m_printableString; m_value = source.m_value; return *this; } @@ -100,6 +114,14 @@ const QByteArray& ReToken::toString() { return m_string; } +/** + * @brief Returns the string representation of the instance + * @return a string representing the instance + */ +const QByteArray& ReToken::string() const{ + return m_string; +} + /** * @brief Returns the integer value of the token * @@ -159,7 +181,7 @@ int ReToken::id() const { * @brief Returns the token type. * @return the token type */ -RplTokenType ReToken::tokenType() const { +ReTokenType ReToken::tokenType() const { return m_tokenType; } @@ -171,7 +193,7 @@ RplTokenType ReToken::tokenType() const { * @return true: the expected type is the current
* false: otherwise */ -bool ReToken::isTokenType(RplTokenType expected) const { +bool ReToken::isTokenType(ReTokenType expected) const { return m_tokenType == expected; } @@ -226,6 +248,20 @@ bool ReToken::isCapitalizedId() const { return rc; } +/** + * Tests whether the token describes an integer. + * + * @param value OUT: NULL or the value of the integer. Only defined if result is true + * @return true: the token describes an integer + */ +bool ReToken::isInteger(int* value) +{ + bool rc = m_tokenType == TOKEN_NUMBER; + if (rc && value != NULL) + *value = m_value.m_integer; + return rc; +} + /** * @brief Returns the description of the current token. * @@ -277,7 +313,7 @@ QByteArray ReToken::asUtf8() const { * @param type the type to convert * @return the token type name */ -const char* ReToken::nameOfType(RplTokenType type) { +const char* ReToken::nameOfType(ReTokenType type) { const char* rc = "?"; switch (type) { @@ -332,6 +368,20 @@ const char* ReToken::nameOfType(RplTokenType type) { * */ +/** + * Stores items in a vector. + * + * Builds also a character table with flags for the 1st, 2nd, 3rd or rest + * characters of the items. + * + * @param items a string with the items delimited by ' ' + * @param vector OUT: the vector with the items + * @param firstCharFlag a flag set for the first character of any item + * @param secondCharFlag a flag set for the second character of any item + * @param thirdCharFlag a flag set for the 3rd character of any item + * @param restCharFlag a flag set for the rest characters of any items + * @param charInfo OUT: info about all characters: bitmap of the 4 flags + */ static void itemsToVector(const char* items, ReLexer::StringList& vector, int firstCharFlag, int secondCharFlag, int thirdCharFlag, int restCharFlag, int charInfo[]) { @@ -423,6 +473,7 @@ ReLexer::ReLexer(ReSource* source, const char* keywords, const char* operators, int stringFeatures, int storageFlags) : m_source(source), m_keywords(), + m_keywordNames(), m_operators(), m_commentStarts(), m_commentEnds(), @@ -443,6 +494,7 @@ ReLexer::ReLexer(ReSource* source, const char* keywords, const char* operators, m_input(), m_currentCol(0), m_hasMoreInput(false), + m_storeAll(false), m_stringFeatures(stringFeatures), m_storageFlags(storageFlags), // m_prioOfOp @@ -455,6 +507,7 @@ ReLexer::ReLexer(ReSource* source, const char* keywords, const char* operators, memset(m_assocOfOp, 0, sizeof m_assocOfOp); memset(m_charInfo, 0, sizeof m_charInfo); + m_keywordNames = QByteArray(keywords).split(' '); itemsToVector(keywords, m_keywords, CC_FIRST_KEYWORD, CC_2nd_KEYWORD, CC_3rd_KEYWORD, CC_REST_KEYWORD, m_charInfo); prepareOperators(operators, rightAssociatives); @@ -541,7 +594,13 @@ void ReLexer::prepareOperators(const char* operators, start = end + 1; } } - +/** + * Initializes the comments. + * + * @param comments a string with comment pairs delimited by ' '.
+ * Note: Use '\n' for the 2nd of a single line comment.
+ * Example: "/+ +/ // \n" + */ void ReLexer::initializeComments(const char* comments) { if (comments != NULL) { QByteArray starters; @@ -579,8 +638,8 @@ void ReLexer::initializeComments(const char* comments) { * @param tokenLength the length of the prefix in m_input * @param vector the vector to search. Each element contains the id * as last entry - * @param id the id of the entry in the vector. Only set if found - * @return + * @return 0: not found
+ * otherwise: the id of the token */ int ReLexer::findInVector(int tokenLength, const StringList& vector) { int id = 0; @@ -654,7 +713,7 @@ bool ReLexer::fillInput() { * @return NULL: not found
* otherwise: the token */ -ReToken* ReLexer::findTokenWithId(RplTokenType tokenType, int flag2, +ReToken* ReLexer::findTokenWithId(ReTokenType tokenType, int flag2, StringList& names) { int length = 1; int inputLength = m_input.size(); @@ -763,6 +822,8 @@ ReToken* ReLexer::scanNumber() { length = realLength; } } + if (m_storeAll) + m_currentToken->m_string.append(m_input.constData(), length); m_input.remove(0, length); m_currentCol += length; return m_currentToken; @@ -889,6 +950,11 @@ void ReLexer::scanComment() { m_input.remove(0, length); m_currentCol += length; } + +void ReLexer::setStoreAll(bool storeAll) +{ + m_storeAll = storeAll; +} #if defined (RPL_LEXER_TRACE) bool ReLexer::trace() const { @@ -986,6 +1052,8 @@ ReToken* ReLexer::nextToken() { rc = m_currentToken; rc->m_tokenType = TOKEN_OPERATOR; rc->m_value.m_id = findInVector(1, m_operators); + if (m_storeAll) + rc->m_string.append(m_input.constData(), 1); m_input.remove(0, 1); m_currentCol += 1; } @@ -1006,7 +1074,6 @@ ReToken* ReLexer::nextToken() { m_input.remove(0, length); m_currentCol += length; } - } } } @@ -1100,6 +1167,19 @@ size_t ReLexer::maxTokenLength() const { return m_maxTokenLength; } +/** + * Returns the keyword to a given keyword id. + * + * @param keyword the keyword id + * @return "": not found
+ * otherwise: the keyword + */ +const QByteArray&ReLexer::nameOfKeyword(int keyword) const +{ + return keyword > 0 && keyword <= m_keywordNames.size() + ? m_keywordNames[keyword - 1] : ReStringUtils::m_empty; +} + /** * @brief Sets the maximal length of a token * @@ -1115,10 +1195,10 @@ void ReLexer::setMaxTokenLength(size_t maxTokenLength) { */ ReToken* ReLexer::nextNonSpaceToken() { ReToken* rc = NULL; - RplTokenType type; + ReTokenType type; do { rc = nextToken(); - } while ((type = m_currentToken->tokenType()) == TOKEN_SPACE + } while ((type = rc->tokenType()) == TOKEN_SPACE || type == TOKEN_COMMENT_START || type == TOKEN_COMMENT_END || type == TOKEN_COMMENT_REST_OF_LINE); return rc; @@ -1137,6 +1217,26 @@ ReToken* ReLexer::nextNonSpaceToken() { void ReLexer::startUnit(ReSourceUnitName unit) { m_source->startUnit(unit, *m_currentPosition); } + +/** + * Returns the text of the token. + * + * Note: the token does not store the text of operators and keywords. + * + * @param token token to append + * @param buffer OUT: the buffer to append + * @return buffer (for chaining) + */ +QByteArray& ReLexer::textOfToken(ReToken* token, QByteArray& buffer) +{ + if (token->tokenType() == TOKEN_OPERATOR) + buffer.append(nameOfOp(token->id())); + else if (token->tokenType() == TOKEN_KEYWORD) + buffer.append(nameOfKeyword(token->id())); + else + buffer.append(token->string()); + return buffer; +} /** * @brief Returns the source of the instance. * diff --git a/expr/ReLexer.hpp b/expr/ReLexer.hpp index e081021..fb4c0e4 100644 --- a/expr/ReLexer.hpp +++ b/expr/ReLexer.hpp @@ -14,7 +14,7 @@ //#define RPL_LEXER_TRACE -enum RplTokenType { +enum ReTokenType { TOKEN_UNDEF, TOKEN_STRING, TOKEN_NUMBER, @@ -39,30 +39,33 @@ class ReLexer; class ReToken { public: - ReToken(RplTokenType type); + ReToken(); + ReToken(ReTokenType type); ~ReToken(); ReToken(const ReToken& source); ReToken& operator =(const ReToken& source); public: friend class ReLexer; - const QByteArray& toString(); - bool isInteger(); int asInteger() const; - quint64 asUInt64() const; qreal asReal() const; - const QByteArray& rawString() const; - int id() const; - RplTokenType tokenType() const; - bool isTokenType(RplTokenType expected) const; - bool isOperator(int expected, int alternative = 0) const; - bool isKeyword(int expected, int alternative = 0) const; + quint64 asUInt64() const; + QByteArray asUtf8() const; void clear(); - bool isCapitalizedId() const; QByteArray dump() const; - static const char* nameOfType(RplTokenType type); - QByteArray asUtf8() const; + int id() const; + bool isCapitalizedId() const; + bool isInteger(int* value = NULL); + bool isKeyword(int expected, int alternative = 0) const; + bool isTokenType(ReTokenType expected) const; + bool isOperator(int expected, int alternative = 0) const; + const QByteArray& rawString() const; + const QByteArray& string() const; + ReTokenType tokenType() const; + const QByteArray& toString(); +public: + static const char* nameOfType(ReTokenType type); protected: - RplTokenType m_tokenType; + ReTokenType m_tokenType; QByteArray m_string; // only for TOKEN_STRING: copy from source but with escaped chars like "\\n" QByteArray m_printableString; @@ -86,7 +89,7 @@ public: NUMTYPE_FLOAT = 1 << 3, /// NUMTYPE_ALL_INTEGER = NUMTYPE_DECIMAL | NUMTYPE_OCTAL - | NUMTYPE_HEXADECIMAL, + | NUMTYPE_HEXADECIMAL, NUMTYPE_ALL = NUMTYPE_ALL_INTEGER | NUMTYPE_FLOAT }; enum CharClassTag { @@ -143,7 +146,7 @@ public: SF_DOUBLE_DELIM = 1 << 6, // Redefinitions for better reading: SF_LIKE_C = SF_TICK | SF_QUOTE | SF_C_ESCAPING | SF_C_SPECIAL - | SF_C_HEX_CHARS + | SF_C_HEX_CHARS }; enum StorageFlags { S_UNDEF, @@ -161,41 +164,45 @@ public: public: ReLexer(ReSource* source, const char* keywords, const char* operators, - const char* rightAssociatives, const char* comments, - const char* firstCharsId = "a-zA-Z_", const char* restCharsId = - "a-zA-Z0-9_", - int numericTypes = NUMTYPE_DECIMAL | NUMTYPE_HEXADECIMAL - | NUMTYPE_FLOAT, - int stringFeatures = SF_TICK | SF_QUOTE | SF_C_ESCAPING | SF_C_SPECIAL - | SF_C_HEX_CHARS, int storageFlags = STORE_NOTHING); + const char* rightAssociatives, const char* comments, + const char* firstCharsId = "a-zA-Z_", const char* restCharsId = + "a-zA-Z0-9_", + int numericTypes = NUMTYPE_DECIMAL | NUMTYPE_HEXADECIMAL + | NUMTYPE_FLOAT, + int stringFeatures = SF_TICK | SF_QUOTE | SF_C_ESCAPING | SF_C_SPECIAL + | SF_C_HEX_CHARS, int storageFlags = STORE_NOTHING); virtual ~ReLexer(); public: - ReToken* nextToken(); - void undoLastToken(); - void undoLastToken2(); - void saveLastToken(); - ReToken* peekNonSpaceToken(); - ReToken* nextNonSpaceToken(); + const ReSourcePosition* currentPosition() const; + ReToken* currentToken() const; + bool isRightAssociative(int op) const; size_t maxTokenLength() const; + const QByteArray& nameOfKeyword(int keyword) const; + const QByteArray& nameOfOp(int op) const; + virtual ReToken* nextToken(); + ReToken* nextNonSpaceToken(); + ReToken* peekNonSpaceToken(); + int prioOfOp(int op) const; + void saveLastToken(); void setMaxTokenLength(size_t maxTokenLength); - void startUnit(ReSourceUnitName unit); + void setStoreAll(bool storeAll); ReSource* source(); - int prioOfOp(int op) const; - const QByteArray& nameOfOp(int op) const; - bool isRightAssociative(int op) const; - const ReSourcePosition* currentPosition() const; - ReToken* currentToken() const; + void startUnit(ReSourceUnitName unit); + QByteArray& textOfToken(ReToken* token, QByteArray& buffer); + void undoLastToken(); + void undoLastToken2(); #if defined RPL_LEXER_TRACE bool trace() const; void setTrace(bool trace); #endif + private: - void prepareOperators(const char* operators, const char* rightAssociatives); - void initializeComments(const char* comments); bool fillInput(); int findInVector(int tokenLength, const StringList& vector); - ReToken* findTokenWithId(RplTokenType tokenType, int flag2, - StringList& names); + ReToken* findTokenWithId(ReTokenType tokenType, int flag2, + StringList& names); + void initializeComments(const char* comments); + void prepareOperators(const char* operators, const char* rightAssociatives); ReToken* scanNumber(); ReToken* scanString(); void scanComment(); @@ -203,6 +210,8 @@ protected: ReSource* m_source; /// sorted, string ends with the id of the keyword StringList m_keywords; + // index is operator id: + StringList m_keywordNames; // sorted, string ends with the id of the operator StringList m_operators; // sorted, each entry ends with the id of the comment start @@ -230,6 +239,7 @@ protected: QByteArray m_input; int m_currentCol; bool m_hasMoreInput; + bool m_storeAll; int m_stringFeatures; int m_storageFlags; /// priority of the operators: index: id of the operator. content: prio diff --git a/expr/ReParser.cpp b/expr/ReParser.cpp index a8f1ddb..437747b 100644 --- a/expr/ReParser.cpp +++ b/expr/ReParser.cpp @@ -39,7 +39,7 @@ * @return */ ReSyntaxError::ReSyntaxError(const char* reason) : - m_reason(reason) { + m_reason(reason) { } /** * @brief Returns the description of the exception. @@ -63,8 +63,8 @@ const char* ReSyntaxError::reason() const { * @brief Constructor. * @param reason the reason of the exception */ -RplParserStop::RplParserStop(const char* reason) : - ReSyntaxError(reason) { +ReParserStop::ReParserStop(const char* reason) : + ReSyntaxError(reason) { } /** @class ReParser ReParser.hpp "expr/ReParser.hpp" @@ -77,16 +77,14 @@ RplParserStop::RplParserStop(const char* reason) : * @brief Constructor. * * @param lexer the tokenizer - * @param tree the abstract syntax tree - */ -ReParser::ReParser(ReLexer& lexer, ReASTree& tree) : - m_lexer(lexer), - m_tree(tree), - m_messages(), - m_errors(0), - m_warnings(0), - m_maxErrors(20), - m_maxWarnings(20) { + */ +ReParser::ReParser(ReLexer& lexer) : + m_lexer(lexer), + m_messages(), + m_errors(0), + m_warnings(0), + m_maxErrors(20), + m_maxWarnings(20) { } /** @@ -99,11 +97,11 @@ ReParser::ReParser(ReLexer& lexer, ReASTree& tree) : * @return false (for chaining) */ bool ReParser::addSimpleMessage(LevelTag prefix, int location, - const ReSourcePosition* position, const char* message) { + const ReSourcePosition* position, const char* message) { char buffer[2048]; QByteArray msg; qsnprintf(buffer, sizeof buffer, "%c%04d %s:%d-%d: ", prefix, location, - position->sourceUnit()->name(), position->lineNo(), position->column()); + position->sourceUnit()->name(), position->lineNo(), position->column()); int used = strlen(buffer); int length = strlen(message); if (length >= (int) sizeof buffer - used) @@ -125,7 +123,7 @@ bool ReParser::addSimpleMessage(LevelTag prefix, int location, * @return false (for chaining) */ bool ReParser::addMessage(LevelTag prefix, int location, - const ReSourcePosition* position, const char* format, va_list varList) { + const ReSourcePosition* position, const char* format, va_list varList) { char buffer[2048]; qvsnprintf(buffer, sizeof buffer, format, varList); return addSimpleMessage(prefix, location, position, buffer); @@ -161,12 +159,12 @@ void ReParser::syntaxError(int location, const char* message) { */ void ReParser::syntaxError(int location, const char* message, - const char* symbol, const ReSourcePosition* position) { + const char* symbol, const ReSourcePosition* position) { char buffer[256]; char buffer2[512]; qsnprintf(buffer2, sizeof buffer2, - "The starting symbol %s is located here. Missing point: %s", symbol, - m_lexer.currentPosition()->utf8(buffer, sizeof buffer)); + "The starting symbol %s is located here. Missing point: %s", symbol, + m_lexer.currentPosition()->utf8(buffer, sizeof buffer)); addSimpleMessage(LT_ERROR, location, m_lexer.currentPosition(), message); addSimpleMessage(LT_INFO, location + 1, position, buffer2); @@ -189,7 +187,7 @@ bool ReParser::error(int location, const char* format, ...) { addMessage(LT_ERROR, location, m_lexer.currentPosition(), format, ap); va_end(ap); if (++m_errors >= m_maxErrors) - throw RplParserStop("too many errors"); + throw ReParserStop("too many errors"); return false; } /** @@ -205,11 +203,11 @@ bool ReParser::error(int location, const char* format, ...) { * @return false (for chaining) */ bool ReParser::error(int location, const ReSourcePosition* position, - const char* message, const char* message2) { + const char* message, const char* message2) { addSimpleMessage(LT_ERROR, location, m_lexer.currentPosition(), message); addSimpleMessage(LT_INFO, location + 1, position, message2); if (++m_errors >= m_maxErrors) - throw RplParserStop("too many errors"); + throw ReParserStop("too many errors"); return false; } @@ -228,7 +226,7 @@ void ReParser::warning(int location, const char* format, ...) { addMessage(LT_WARNING, location, m_lexer.currentPosition(), format, ap); va_end(ap); if (++m_warnings >= m_maxWarnings) - throw RplParserStop("too many warnings"); + throw ReParserStop("too many warnings"); } /** * @brief Return the number of errors. diff --git a/expr/ReParser.hpp b/expr/ReParser.hpp index ebdd406..d265034 100644 --- a/expr/ReParser.hpp +++ b/expr/ReParser.hpp @@ -21,9 +21,9 @@ private: const char* m_reason; }; -class RplParserStop: public ReSyntaxError { +class ReParserStop: public ReSyntaxError { public: - RplParserStop(const char* reason); + ReParserStop(const char* reason); }; class ReParser { @@ -37,24 +37,24 @@ public: public: typedef QList MessageList; public: - ReParser(ReLexer& lexer, ReASTree& ast); + ReParser(ReLexer& lexer); public: bool addSimpleMessage(LevelTag prefix, int location, - const ReSourcePosition* pos, const char* message); + const ReSourcePosition* pos, const char* message); bool addMessage(LevelTag prefix, int location, const ReSourcePosition* pos, - const char* format, va_list varList); + const char* format, va_list varList); void syntaxError(int location, const char* message); void syntaxError(int location, const char* message, const char* symbol, - const ReSourcePosition* position); + const ReSourcePosition* position); bool error(int location, const char* format, ...); bool error(int location, const ReSourcePosition* position, - const char* message, const char* message2); + const char* message, const char* message2); void warning(int location, const char* format, ...); int errors() const; int warnings() const; protected: ReLexer& m_lexer; - ReASTree& m_tree; + ReASTree* m_tree; MessageList m_messages; int m_errors; int m_warnings; diff --git a/remodules.hpp b/remodules.hpp index 37ad075..1633b4b 100644 --- a/remodules.hpp +++ b/remodules.hpp @@ -35,6 +35,10 @@ enum { LOC_FILESYSTEM, LOC_RANDOMIZER, LOC_CRYPTFILESYSTEM, + + // Applications: + LOC_RECFORM_CPPPARSER = 201, + LOC_RECFORM_CPPFORMATTER = 202, }; #define LOC_FIRST_OF(moduleNo) (moduleNo*100+1) class RplModules {