]> gitweb.hamatoma.de Git - reqt/commitdiff
day's work
authorhama <hama@siduction.net>
Mon, 30 Jun 2014 23:21:26 +0000 (01:21 +0200)
committerhama <hama@siduction.net>
Mon, 30 Jun 2014 23:21:26 +0000 (01:21 +0200)
rplexpr/rpllexer.cpp
rplexpr/rpllexer.hpp
rplexpr/rplmfparser.cpp
rplexpr/rplmfparser.hpp
unittests/rpllexer_test.cpp

index 89c3dbfdea48036b7a239dcde79f683ba072f5e5..051451e0a4782c90e92856f2a21b08ac5c77101d 100644 (file)
@@ -278,7 +278,9 @@ static void charClassToCharInfo(const char* charClass, int flag,
  *                      S_ORG_STRINGS | S_COMMENTS | S_BLANKS
  */
 RplLexer::RplLexer(RplSource* source,
-        const char* keywords, const char* operators, const char* comments,
+        const char* keywords,
+        const char* operators, const char* rightAssociatives,
+        const char* comments,
         const char* firstCharsId, const char* restCharsId, int numericTypes,
         int stringFeatures, int storageFlags) :
     m_source(source),
@@ -312,7 +314,7 @@ RplLexer::RplLexer(RplSource* source,
     memset(m_charInfo, 0, sizeof m_charInfo);
     itemsToVector(keywords, m_keywords, CC_FIRST_KEYWORD, CC_2nd_KEYWORD,
                   CC_3rd_KEYWORD, CC_REST_KEYWORD, m_charInfo);
-    prepareOperators(operators);
+    prepareOperators(operators, rightAssociatives);
     charClassToCharInfo(firstCharsId, CC_FIRST_ID, m_charInfo);
     charClassToCharInfo(restCharsId, CC_REST_ID, m_charInfo);
     initializeComments(comments);
@@ -349,8 +351,11 @@ int countBlanks(const char* start, const char* end){
  *                      '\n' separates the operators with the same priority.
  *                      Lower position means lower priority
  */
-void RplLexer::prepareOperators(const char* operators){
+void RplLexer::prepareOperators(const char* operators,
+                                const char* rightAssociatives){
     QByteArray op2(operators);
+    QByteArray rightAssociatives2(" ");
+    rightAssociatives2 += rightAssociatives;
     op2.replace("\n", " ");
     itemsToVector(op2.constData(), m_operators, CC_FIRST_OP, CC_2nd_OP,
                   CC_3rd_OP, CC_REST_OP, m_charInfo);
@@ -364,6 +369,16 @@ void RplLexer::prepareOperators(const char* operators){
             int id = m_operators[ix].at(2).unicode();
             m_charInfo[id] |= CC_OP_1_ONLY;
         }
+
+    }
+    m_opNames.reserve(m_operators.size() + 1);
+    op2 = " " + op2;
+    m_opNames = op2.split(' ');
+    QByteArray rAssoc = QByteArray(" ") + rightAssociatives + " ";
+    for (int opId = m_opNames.size() - 1; opId >= 1; opId--){
+        QByteArray item = " " + m_opNames[opId] + " ";
+        if (rAssoc.indexOf(item) >= 0)
+            m_assocOfOp[opId] = true;
     }
     const char* start = operators;
     const char* end;
index f761affa63d1aefc03a1949b4b8a8dfac7070ea2..5536aad9c4cf00e9ff16d44fd6609406ac1fd3d1 100644 (file)
@@ -152,7 +152,9 @@ public:
 
 public:
     RplLexer(RplSource* source,
-        const char* keywords, const char* operators,
+        const char* keywords,
+        const char* operators,
+        const char* rightAssociatives,
         const char* comments,
         const char* firstCharsId = "a-zA-Z_",
         const char* restCharsId = "a-zA-Z0-9_",
@@ -171,10 +173,12 @@ public:
     void startUnit(const QString& unit);
     RplSource* source();
     int prioOfOp(int op) const;
+    const QByteArray& nameOfOp(int op) const;
+    bool isRightAssociative(int op) const;
     RplSourcePosition* currentPosition() const;
 
 private:
-    void prepareOperators(const char* operators);
+    void prepareOperators(const char* operators, const char* rightAssociatives);
     void initializeComments(const char* comments);
     bool fillInput();
     int findInVector(int tokenLength, const StringList& vector);
@@ -218,6 +222,8 @@ protected:
     int m_storageFlags;
     /// priority of the operators: index: id of the operator. content: prio
     char m_prioOfOp[128];
+    char m_assocOfOp[128];
+    QList<QByteArray> m_opNames;
 };
 
 
index 2776249c809b4fd14376faf5e31a8fe726743219..e88a2dbde89b86ff025d6cc8843f68c95a1ed00a 100644 (file)
 enum MFLocations{
     L_PARSE_OPERAND_RPARENTH = 2000,
     L_PARSE_OPERAND_RPARENTH_FUNC,
-    L_PARSE_OPERAND_WRONG
+    L_PARSE_OPERAND_WRONG,
+    L_TERM_NO_OP,
+    L_TERM_NO_OP2,
+    L_TERM_WRONG_KEYWORD,
+    L_TERM_WRONG_ID
 
 };
 
@@ -29,7 +33,7 @@ enum MFLocations{
 RplMFParser::RplMFParser(RplSource& source, RplASTree& abstractSyntaxTree) :
     RplParser(m_lexer, abstractSyntaxTree),
     m_lexer(&source,
-            MF_KEYWORDS, MF_OPERATORS,
+            MF_KEYWORDS, MF_OPERATORS, MF_RIGHT_ASSOCIATIVES,
             "/* */ // \n",
             "a-zA-Z_", "a-zA-Z0-9_",
             RplLexer::NUMTYPE_ALL, RplLexer::SF_LIKE_C)
@@ -208,26 +212,47 @@ RplASItem* RplMFParser::parseTerm(int depth){
         item = parseOperand(depth);
         token = m_lexer.nextNonSpaceToken();
         RplTokenType tokenType = token->tokenType();
+        int tokenId;
         switch(tokenType){
         case TOKEN_OPERATOR:
         {
             if (IS_BINARY_OP(tokenType)){
                 RplASBinaryOp* op = new RplASBinaryOp();
-                op->setOperator(token->id());
+                int opId = token->id();
+                op->setOperator(opId);
                 int prio = m_lexer.prioOfOp(token->id());
-                op->setChild(item);
-                op->setPosition(m_lexer.currentPosition());
-                op->setChild2(parseOperand(depth));
-
-            }
+                if (prio < lastPrio
+                        || prio == lastPrio && m_lexer.isRightAssociative(opId)){
+                    op->setChild(item);
+                    op->setPosition(m_lexer.currentPosition());
+                    op->setChild2(parseOperand(depth));
+                } else {
+                    op->setChild(item);
+                    op->setPosition(m_lexer.currentPosition());
+                    op->setChild2(parseOperand(depth));
+                }
+            } else if ( (tokenId = token->id()) == O_RPARENT
+                        || IS_OP_BEHIND_EXPR(tokenId))
+                again = false;
+            else
+                syntaxError(L_TERM_NO_OP, "Operator expected");
             break;
         }
         case TOKEN_STRING:
         case TOKEN_NUMBER:
         case TOKEN_REAL:
+            syntaxError(L_TERM_NO_OP2, "Operator expected");
             break;
         case TOKEN_KEYWORD:
+            tokenId = token->id();
+            if (IS_KEYWORD_BEHIND_EXPR(tokenId))
+                again = false;
+            else
+                syntaxError(L_TERM_WRONG_KEYWORD, "unexpected keyword found");
+            break;
         case TOKEN_ID:
+            syntaxError(L_TERM_WRONG_ID, "unexpected id found");
+            break;
         case TOKEN_END_OF_SOURCE:
             again = false;
             break;
index 8ebe9b3e3246c4c8ed00a6d3b36d2c32acf0916f..f17d6e2cd832863bc286e4db677fdab64aef9f63 100644 (file)
@@ -16,13 +16,15 @@ public:
     enum Keyword {
         K_UNDEF, K_IF, K_THEN, K_ELSE, K_FI, K_WHILE, K_DO, K_OD, K_REPEAT, K_UNTIL,
         K_FOR, K_FROM, K_TO, K_STEP, K_CASE, K_OF, K_ESAC, K_LEAVE, K_CONTINUE, K_PASS,
-        K_CLASS, K_END, K_FUNCTION, K_GENERATOR, K_IMPORT,
+        K_CLASS, K_ENDC, K_ENDF, K_FUNCTION, K_GENERATOR, K_IMPORT,
         K_CONST, K_LAZY, K_INT, K_FLOAT, K_BOOL, K_NONE, K_TRUE, K_FALSE
          };
+#define IS_KEYWORD_BEHIND_EXPR(o) (o==K_THEN||o==K_DO||o==K_FROM||o==K_OF \
+    ||o==K_ENDF||o==K_ENDC)
 #define MF_KEYWORDS "if then else fi while do od repeat until" \
     " for from to step case of esac leave continue pass" \
-    " class end function generator import" \
-    " const lazy int float bool None True False"
+    " class endc endf func generator import" \
+    " const lazy None True False"
     enum Operator {
         O_UNDEF, O_SEMI_SEMICOLON, O_SEMICOLON, O_COMMA, O_COLON,
         O_ASSIGN, O_PLUS_ASSIGN, O_MINUS_ASSIGN, O_DIV_ASSIGN, O_TIMES_ASSIGN,
@@ -40,10 +42,12 @@ public:
         O_DOT,
         O_NOT, O_BIT_NOT,
         O_INC, O_DEC,
-        O_LPARENTH, O_RPARENT, O_LBRACKET, O_RBRACKET
+        O_LPARENTH, O_RPARENT, O_LBRACKET, O_RBRACKET, O_LBRACE, O_RBRACE
     };
 #define IS_BINARY_OP(op) ((op) >= O_ASSIGN && op <= O_DOT)
 #define IS_UNARY_OP(op) (op==O_PLUS || op==O_MINUS || (op>=O_NOT && op<=O_DEC))
+#define IS_OP_BEHIND_EXPR(o) (o==O_RBRACKET||o==O_RBRACE||o==O_SEMICOLON \
+    ||o==O_COMMA||o==O_SEMI_SEMICOLON||o==O_QUESTION)
 /// \n separates the priority classes
 #define MF_OPERATORS ";; ; , :\n" \
     "= += -= /= *= %= **= |= &= <<= >>= >>>=\n" \
@@ -60,7 +64,8 @@ public:
     ".\n" \
     "! ~\n" \
     "++ --\n" \
-    ". ( ) [ ]"
+    ". ( ) [ ] { }"
+#define MF_RIGHT_ASSOCIATIVES "= += -= /= *= %= **= |= &= <<= >>= >>>= ** ."
 public:
     RplMFParser(RplSource& source, RplASTree& ast);
 public:
index 93998c590c562aec30fe76fac8e6e86e8cee9fb6..78780b608b355271934ec4a3f779eff2c173e249 100644 (file)
@@ -73,7 +73,7 @@ public:
 #       define BLANKS2 " \n"
         reader.addSource("<main>", BLANKS1 BLANKS2);
         source.addReader(&reader);
-        RplLexer lex(&source, KEYWORDS, OPERATORS, COMMENTS,
+        RplLexer lex(&source, KEYWORDS, OPERATORS, "=", COMMENTS,
                  "A-Za-z_",
                  "A-Za-z0-9_",
                  RplLexer::NUMTYPE_DECIMAL,
@@ -87,7 +87,7 @@ public:
         const char* blanks = "321 0x73 7.8e+5";
         reader.addSource("<main>", blanks);
         source.addReader(&reader);
-        RplLexer lex(&source, KEYWORDS, OPERATORS, COMMENTS,
+        RplLexer lex(&source, KEYWORDS, OPERATORS, "=", COMMENTS,
                  "A-Za-z_",
                  "A-Za-z0-9_",
                  RplLexer::NUMTYPE_ALL,
@@ -108,7 +108,7 @@ public:
         source.addReader(&reader);
         enum { UNDEF, SHIFT, LT, SHIFT2, LE, EQ, ASSIGN,
                LPARENT, RPARENT, LBRACKET, RBRACKET };
-        RplLexer lex(&source, KEYWORDS, ops, COMMENTS,
+        RplLexer lex(&source, KEYWORDS, ops, "=", COMMENTS,
                  "A-Za-z_",
                  "A-Za-z0-9_",
                  RplLexer::NUMTYPE_ALL,
@@ -142,7 +142,7 @@ public:
 
         enum { COMMENT_UNDEF, COMMENT_MULTILINE, COMMENT_1
         };
-        RplLexer lex(&source, KEYWORDS, OPERATORS, COMMENTS,
+        RplLexer lex(&source, KEYWORDS, OPERATORS, "=", COMMENTS,
                  "A-Za-z_",
                  "A-Za-z0-9_",
                  RplLexer::NUMTYPE_ALL,
@@ -168,7 +168,7 @@ public:
         reader.addSource("<main>", "\"abc\\t\\r\\n\\a\\v\"'1\\x9Z\\x21A\\X9'");
         source.addReader(&reader);
 
-        RplLexer lex(&source, KEYWORDS, OPERATORS, COMMENTS,
+        RplLexer lex(&source, KEYWORDS, OPERATORS, "=", COMMENTS,
                  "A-Za-z_",
                  "A-Za-z0-9_",
                  RplLexer::NUMTYPE_ALL,
@@ -183,7 +183,7 @@ public:
         reader.addSource("<main>", "if\n\tthen else\nfi");
         source.addReader(&reader);
 
-        RplLexer lex(&source, KEYWORDS, OPERATORS, COMMENTS,
+        RplLexer lex(&source, KEYWORDS, OPERATORS, "=", COMMENTS,
                  "A-Za-z_",
                  "A-Za-z0-9_",
                  RplLexer::NUMTYPE_ALL,
@@ -203,7 +203,7 @@ public:
             "_ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789");
         source.addReader(&reader);
 
-        RplLexer lex(&source, KEYWORDS, OPERATORS, COMMENTS,
+        RplLexer lex(&source, KEYWORDS, OPERATORS, "=", COMMENTS,
                  "A-Za-z_",
                  "A-Za-z0-9_",
                  RplLexer::NUMTYPE_ALL,
@@ -220,7 +220,7 @@ public:
         RplStringReader reader(source);
         source.addReader(&reader);
         reader.addSource("<main>", "if i>1 then i=1+2*_x9 fi");
-        RplLexer lex(&source, KEYWORDS, OPERATORS, COMMENTS,
+        RplLexer lex(&source, KEYWORDS, OPERATORS, "=", COMMENTS,
                  "A-Za-z_",
                  "A-Za-z0-9_",
                  RplLexer::NUMTYPE_ALL,
@@ -245,7 +245,7 @@ public:
         enum { O_UNDEF, O_ASSIGN, O_PLUS, O_MINUS, O_TIMES, O_DIV
              };
         RplLexer lex(&source, KEYWORDS,
-                 "=\n+ -\n* /",
+                     "=\n+ -\n* /", "=",
                  COMMENTS,
                  "A-Za-z_",
                  "A-Za-z0-9_",