/////////////////////////////////////////////////////////////////////////////// // // Pascal Parser Grammar for Spirit (http://spirit.sourceforge.net/) // // Adapted from, // Pascal User Manual And Report (Second Edition-1978) // Kathleen Jensen - Niklaus Wirth // // Written by: Hakki Dogusan dogusanh@tr.net // Adapted by Joel de Guzman djowel@gmx.co.uk // ported to Spirit v1.5 [ JDG 9/16/2002 ] // /////////////////////////////////////////////////////////////////////////////// //#define SPIRIT_DEBUG ///$$$ DEFINE THIS WHEN DEBUGGING $$$/// #include <boost/spirit/core.hpp> #include <boost/spirit/symbols/symbols.hpp> #include <fstream> #include <iostream> #include <vector> /////////////////////////////////////////////////////////////////////////////// using namespace std; using namespace spirit; /////////////////////////////////////////////////////////////////////////////// // // Our Pascal grammar // /////////////////////////////////////////////////////////////////////////////// struct pascal_grammar : public grammar<pascal_grammar> { pascal_grammar() {} template <typename ScannerT> struct definition { definition(pascal_grammar const& /*self*/) { #ifdef SPIRIT_DEBUG debug(); // define the debug names #endif //----------------------------------------------------------------- // KEYWORDS //----------------------------------------------------------------- keywords = "in", "div", "mod", "and", "or", "not", "nil", "goto", "if", "then", "else", "case", "while", "repeat", "until", "for", "do", "to", "downto", "with", "program", "label", "const", "type", "char", "boolean", "integer", "real", "packed", "array", "of", "record", "end", "set", "file", "var", "procedure", "function", "begin"; //----------------------------------------------------------------- // OPERATORS //----------------------------------------------------------------- chlit<> PLUS('+'); chlit<> MINUS('-'); chlit<> STAR('*'); chlit<> SLASH('/'); strlit<> ASSIGN(":="); chlit<> COMMA(','); chlit<> SEMI(';'); chlit<> COLON(':'); chlit<> EQUAL('='); strlit<> NOT_EQUAL("<>"); chlit<> LT('<'); strlit<> LE("<="); strlit<> GE(">="); chlit<> GT('>'); chlit<> LPAREN('('); chlit<> RPAREN(')'); chlit<> LBRACK('['); chlit<> RBRACK(']'); chlit<> POINTER('^'); chlit<> DOT('.'); strlit<> DOTDOT(".."); //----------------------------------------------------------------- // TOKENS //----------------------------------------------------------------- typedef inhibit_case<strlit<> > token_t; token_t IN_ = nocase_d["in"]; token_t DIV = nocase_d["div"]; token_t MOD = nocase_d["mod"]; token_t AND = nocase_d["and"]; token_t OR = nocase_d["or"]; token_t NOT = nocase_d["not"]; token_t NIL = nocase_d["nil"]; token_t GOTO = nocase_d["goto"]; token_t IF = nocase_d["if"]; token_t THEN = nocase_d["then"]; token_t ELSE = nocase_d["else"]; token_t CASE = nocase_d["case"]; token_t WHILE = nocase_d["while"]; token_t REPEAT = nocase_d["repeat"]; token_t UNTIL = nocase_d["until"]; token_t FOR = nocase_d["for"]; token_t DO = nocase_d["do"]; token_t TO = nocase_d["to"]; token_t DOWNTO = nocase_d["downto"]; token_t WITH = nocase_d["with"]; token_t PROGRAM = nocase_d["program"]; token_t LABEL = nocase_d["label"]; token_t CONST_ = nocase_d["const"]; token_t TYPE = nocase_d["type"]; token_t CHAR = nocase_d["char"]; token_t BOOLEAN = nocase_d["boolean"]; token_t INTEGER = nocase_d["integer"]; token_t REAL = nocase_d["real"]; token_t PACKED_ = nocase_d["packed"]; token_t ARRAY = nocase_d["array"]; token_t OF = nocase_d["of"]; token_t RECORD = nocase_d["record"]; token_t END = nocase_d["end"]; token_t SET = nocase_d["set"]; token_t FILE = nocase_d["file"]; token_t VAR = nocase_d["var"]; token_t PROCEDURE = nocase_d["procedure"]; token_t FUNCTION = nocase_d["function"]; token_t BEGIN = nocase_d["begin"]; //----------------------------------------------------------------- // Start grammar definition //----------------------------------------------------------------- identifier = nocase_d[ lexeme_d[ (alpha_p >> *(alnum_p | '_')) - (keywords >> anychar_p - (alnum_p | '_')) ] ]; string_literal = lexeme_d[ chlit<>('\'') >> +( strlit<>("\'\'") | anychar_p-chlit<>('\'') ) >> chlit<>('\'') ]; program = programHeading >> block >> DOT ; programHeading = PROGRAM >> identifier >> LPAREN >> fileIdentifier >> *( COMMA >> fileIdentifier ) >> RPAREN >> SEMI ; fileIdentifier = identifier.alias() ; block = *( labelDeclarationPart | constantDefinitionPart | typeDefinitionPart | variableDeclarationPart | procedureAndFunctionDeclarationPart ) >> statementPart ; labelDeclarationPart = LABEL >> label >> *( COMMA >> label ) >> SEMI ; label = unsignedInteger.alias() ; constantDefinitionPart = CONST_ >> constantDefinition >> *( SEMI >> constantDefinition ) >> SEMI ; constantDefinition = identifier >> EQUAL >> constant ; constant = unsignedNumber | sign >> unsignedNumber | constantIdentifier | sign >> constantIdentifier | string_literal ; unsignedNumber = lexeme_d[uint_p >> !('.' >> uint_p) >> !(nocase_d['e'] >> (ch_p('+') | '-') >> uint_p)] ; unsignedInteger = uint_p ; unsignedReal = ureal_p ; sign = PLUS | MINUS ; constantIdentifier = identifier.alias() ; typeDefinitionPart = TYPE >> typeDefinition >> *( SEMI >> typeDefinition ) >> SEMI ; typeDefinition = identifier >> EQUAL >> type ; type = simpleType | structuredType | pointerType ; simpleType = scalarType | subrangeType | typeIdentifier ; scalarType = LPAREN >> identifier >> *( COMMA >> identifier ) >> RPAREN ; subrangeType = constant >> DOTDOT >> constant ; typeIdentifier = identifier | CHAR | BOOLEAN | INTEGER | REAL ; structuredType = ( PACKED_ | empty ) >> unpackedStructuredType ; unpackedStructuredType = arrayType | recordType | setType | fileType ; arrayType = ARRAY >> LBRACK >> indexType >> *( COMMA >> indexType ) >> RBRACK >> OF >> componentType ; indexType = simpleType.alias() ; componentType = type.alias() ; recordType = RECORD >> fieldList >> END ; fieldList = fixedPart >> ( SEMI >> variantPart | empty ) | variantPart ; fixedPart = recordSection >> *( SEMI >> recordSection ) ; recordSection = fieldIdentifier >> *( COMMA >> fieldIdentifier ) >> COLON >> type | empty ; variantPart = CASE >> tagField >> typeIdentifier >> OF >> variant >> *( SEMI >> variant ) ; tagField = fieldIdentifier >> COLON | empty ; variant = caseLabelList >> COLON >> LPAREN >> fieldList >> RPAREN | empty ; caseLabelList = caseLabel >> *( COMMA >> caseLabel ) ; caseLabel = constant.alias() ; setType = SET >> OF >> baseType ; baseType = simpleType.alias() ; fileType = FILE >> OF >> type ; pointerType = POINTER >> typeIdentifier ; variableDeclarationPart = VAR >> variableDeclaration >> *( SEMI >> variableDeclaration ) >> SEMI ; variableDeclaration = identifier >> *( COMMA >> identifier ) >> COLON >> type ; procedureAndFunctionDeclarationPart = procedureOrFunctionDeclaration >> SEMI ; procedureOrFunctionDeclaration = procedureDeclaration | functionDeclaration ; procedureDeclaration = procedureHeading >> block ; procedureHeading = PROCEDURE >> identifier >> parameterList >> SEMI ; parameterList = empty | LPAREN >> formalParameterSection >> *( SEMI >> formalParameterSection ) >> RPAREN ; formalParameterSection = parameterGroup | VAR >> parameterGroup | FUNCTION >> parameterGroup | PROCEDURE >> identifier >> *( COMMA >> identifier ) ; parameterGroup = identifier >> *( COMMA >> identifier ) >> COLON >> typeIdentifier ; functionDeclaration = functionHeading >> block ; functionHeading = FUNCTION >> identifier >> parameterList >> COLON >> resultType >> SEMI ; resultType = typeIdentifier.alias() ; statementPart = compoundStatement.alias() ; statement = ( label >> COLON | empty ) >> unlabelledStatement ; unlabelledStatement = structuredStatement | simpleStatement ; simpleStatement = assignmentStatement | procedureStatement | gotoStatement | emptyStatement ; assignmentStatement = variable >> ASSIGN >> expression | functionIdentifier >> ASSIGN >> expression ; variable = componentVariable | referencedVariable | entireVariable ; entireVariable = variableIdentifier.alias() ; variableIdentifier = identifier.alias() ; componentVariable = indexedVariable | fieldDesignator | fileBuffer ; indexedVariable = arrayVariable >> LBRACK >> expression >> *( COMMA >> expression) >> RBRACK ; arrayVariable = identifier.alias() ; fieldDesignator = recordVariable >> DOT >> fieldIdentifier ; recordVariable = identifier.alias() ; fieldIdentifier = identifier.alias() ; fileBuffer = fileVariable >> POINTER ; fileVariable = identifier.alias() ; referencedVariable = pointerVariable >> POINTER ; pointerVariable = identifier.alias() ; expression = simpleExpression >> ( relationalOperator >> simpleExpression | empty ) ; relationalOperator = EQUAL | NOT_EQUAL | GE | LE | LT | GT | IN_ ; simpleExpression = ( sign | empty ) >> term >> *( addingOperator >> term ) ; addingOperator = PLUS | MINUS | OR ; term = factor >> *( multiplyingOperator >> factor ) ; multiplyingOperator = STAR | SLASH | DIV | MOD | AND ; factor = LPAREN >> expression >> RPAREN | set | longest_d[ variable | unsignedConstant | functionDesignator ] | NOT >> factor ; unsignedConstant = unsignedNumber | string_literal | constantIdentifier | NIL ; functionDesignator = functionIdentifier >> ( LPAREN >> actualParameter >> *( COMMA >> actualParameter ) >> RPAREN | empty ) ; functionIdentifier = identifier.alias() ; set = LBRACK >> elementList >> RBRACK ; elementList = element >> *( COMMA >> element ) | empty ; element = expression >> ( DOTDOT >> expression | empty ) ; procedureStatement = procedureIdentifier >> ( LPAREN >> actualParameter >> *( COMMA >> actualParameter ) >> RPAREN | empty ) ; procedureIdentifier = identifier.alias() ; actualParameter = expression | variable | procedureIdentifier | functionIdentifier ; gotoStatement = GOTO >> label ; emptyStatement = empty.alias() ; empty = epsilon_p ; structuredStatement = compoundStatement | conditionalStatement | repetetiveStatement | withStatement ; compoundStatement = BEGIN >> statement >> *( SEMI >> statement ) >> END ; conditionalStatement = ifStatement | caseStatement ; ifStatement = IF >> expression >> THEN >> statement >> ( ELSE >> statement | empty ) ; caseStatement = CASE >> expression >> OF >> caseListElement >> *( SEMI >> caseListElement ) >> END ; caseListElement = caseLabelList >> COLON >> statement | empty ; repetetiveStatement = whileStatement | repeatStatement | forStatement ; whileStatement = WHILE >> expression >> DO >> statement ; repeatStatement = REPEAT >> statement >> *( SEMI >> statement ) >> UNTIL >> expression ; forStatement = FOR >> controlVariable >> ASSIGN >> forList >> DO >> statement ; forList = initialValue >> ( TO | DOWNTO ) >> finalValue ; controlVariable = identifier.alias() ; initialValue = expression.alias() ; finalValue = expression.alias() ; withStatement = WITH >> recordVariableList >> DO >> statement ; recordVariableList = recordVariable >> *( COMMA >> recordVariable ) ; //----------------------------------------------------------------- // End grammar definition //----------------------------------------------------------------- } #ifdef SPIRIT_DEBUG void debug() { SPIRIT_DEBUG_RULE(program); SPIRIT_DEBUG_RULE(programHeading); SPIRIT_DEBUG_RULE(fileIdentifier); SPIRIT_DEBUG_RULE(identifier); SPIRIT_DEBUG_RULE(block); SPIRIT_DEBUG_RULE(labelDeclarationPart); SPIRIT_DEBUG_RULE(label); SPIRIT_DEBUG_RULE(constantDefinitionPart); SPIRIT_DEBUG_RULE(constantDefinition); SPIRIT_DEBUG_RULE(constant); SPIRIT_DEBUG_RULE(unsignedNumber); SPIRIT_DEBUG_RULE(unsignedInteger); SPIRIT_DEBUG_RULE(unsignedReal); SPIRIT_DEBUG_RULE(sign); SPIRIT_DEBUG_RULE(constantIdentifier); SPIRIT_DEBUG_RULE(string_literal); SPIRIT_DEBUG_RULE(typeDefinitionPart); SPIRIT_DEBUG_RULE(typeDefinition); SPIRIT_DEBUG_RULE(type); SPIRIT_DEBUG_RULE(simpleType); SPIRIT_DEBUG_RULE(scalarType); SPIRIT_DEBUG_RULE(subrangeType); SPIRIT_DEBUG_RULE(typeIdentifier); SPIRIT_DEBUG_RULE(structuredType); SPIRIT_DEBUG_RULE(unpackedStructuredType); SPIRIT_DEBUG_RULE(arrayType); SPIRIT_DEBUG_RULE(indexType); SPIRIT_DEBUG_RULE(componentType); SPIRIT_DEBUG_RULE(recordType); SPIRIT_DEBUG_RULE(fieldList); SPIRIT_DEBUG_RULE(fixedPart); SPIRIT_DEBUG_RULE(recordSection); SPIRIT_DEBUG_RULE(variantPart); SPIRIT_DEBUG_RULE(tagField); SPIRIT_DEBUG_RULE(variant); SPIRIT_DEBUG_RULE(caseLabelList); SPIRIT_DEBUG_RULE(caseLabel); SPIRIT_DEBUG_RULE(setType); SPIRIT_DEBUG_RULE(baseType); SPIRIT_DEBUG_RULE(fileType); SPIRIT_DEBUG_RULE(pointerType); SPIRIT_DEBUG_RULE(variableDeclarationPart); SPIRIT_DEBUG_RULE(variableDeclaration); SPIRIT_DEBUG_RULE(procedureAndFunctionDeclarationPart); SPIRIT_DEBUG_RULE(procedureOrFunctionDeclaration); SPIRIT_DEBUG_RULE(procedureDeclaration); SPIRIT_DEBUG_RULE(procedureHeading); SPIRIT_DEBUG_RULE(parameterList); SPIRIT_DEBUG_RULE(formalParameterSection); SPIRIT_DEBUG_RULE(parameterGroup); SPIRIT_DEBUG_RULE(functionDeclaration); SPIRIT_DEBUG_RULE(functionHeading); SPIRIT_DEBUG_RULE(resultType); SPIRIT_DEBUG_RULE(statementPart); SPIRIT_DEBUG_RULE(statement); SPIRIT_DEBUG_RULE(unlabelledStatement); SPIRIT_DEBUG_RULE(simpleStatement); SPIRIT_DEBUG_RULE(assignmentStatement); SPIRIT_DEBUG_RULE(variable); SPIRIT_DEBUG_RULE(entireVariable); SPIRIT_DEBUG_RULE(variableIdentifier); SPIRIT_DEBUG_RULE(componentVariable); SPIRIT_DEBUG_RULE(indexedVariable); SPIRIT_DEBUG_RULE(arrayVariable); SPIRIT_DEBUG_RULE(fieldDesignator); SPIRIT_DEBUG_RULE(recordVariable); SPIRIT_DEBUG_RULE(fieldIdentifier); SPIRIT_DEBUG_RULE(fileBuffer); SPIRIT_DEBUG_RULE(fileVariable); SPIRIT_DEBUG_RULE(referencedVariable); SPIRIT_DEBUG_RULE(pointerVariable); SPIRIT_DEBUG_RULE(expression); SPIRIT_DEBUG_RULE(relationalOperator); SPIRIT_DEBUG_RULE(simpleExpression); SPIRIT_DEBUG_RULE(addingOperator); SPIRIT_DEBUG_RULE(term); SPIRIT_DEBUG_RULE(multiplyingOperator); SPIRIT_DEBUG_RULE(factor); SPIRIT_DEBUG_RULE(unsignedConstant); SPIRIT_DEBUG_RULE(functionDesignator); SPIRIT_DEBUG_RULE(functionIdentifier); SPIRIT_DEBUG_RULE(set); SPIRIT_DEBUG_RULE(elementList); SPIRIT_DEBUG_RULE(element); SPIRIT_DEBUG_RULE(procedureStatement); SPIRIT_DEBUG_RULE(procedureIdentifier); SPIRIT_DEBUG_RULE(actualParameter); SPIRIT_DEBUG_RULE(gotoStatement); SPIRIT_DEBUG_RULE(emptyStatement); SPIRIT_DEBUG_RULE(empty); SPIRIT_DEBUG_RULE(structuredStatement); SPIRIT_DEBUG_RULE(compoundStatement); SPIRIT_DEBUG_RULE(conditionalStatement); SPIRIT_DEBUG_RULE(ifStatement); SPIRIT_DEBUG_RULE(caseStatement); SPIRIT_DEBUG_RULE(caseListElement); SPIRIT_DEBUG_RULE(repetetiveStatement); SPIRIT_DEBUG_RULE(whileStatement); SPIRIT_DEBUG_RULE(repeatStatement); SPIRIT_DEBUG_RULE(forStatement); SPIRIT_DEBUG_RULE(forList); SPIRIT_DEBUG_RULE(controlVariable); SPIRIT_DEBUG_RULE(initialValue); SPIRIT_DEBUG_RULE(finalValue); SPIRIT_DEBUG_RULE(withStatement); SPIRIT_DEBUG_RULE(recordVariableList); } #endif rule<ScannerT> const& start() const { return program; } symbols<> keywords; rule<ScannerT> string_literal, program, programHeading, fileIdentifier, identifier, block, labelDeclarationPart, label, constantDefinitionPart, constantDefinition, constant, unsignedNumber, unsignedInteger, unsignedReal, sign, constantIdentifier, typeDefinitionPart, typeDefinition, type, simpleType, scalarType, subrangeType, typeIdentifier, structuredType, unpackedStructuredType, arrayType, indexType, componentType, recordType, fieldList, fixedPart, recordSection, variantPart, tagField, variant, caseLabelList, caseLabel, setType, baseType, fileType, pointerType, variableDeclarationPart, variableDeclaration, procedureAndFunctionDeclarationPart, procedureOrFunctionDeclaration, procedureDeclaration, procedureHeading, parameterList, formalParameterSection, parameterGroup, functionDeclaration, functionHeading, resultType, statementPart, statement, unlabelledStatement, simpleStatement, assignmentStatement, variable, entireVariable, variableIdentifier, componentVariable, indexedVariable, arrayVariable, fieldDesignator, recordVariable, fieldIdentifier, fileBuffer, fileVariable, referencedVariable, pointerVariable, expression, relationalOperator, simpleExpression, addingOperator, term, multiplyingOperator, factor, unsignedConstant, functionDesignator, functionIdentifier, set, elementList, element, procedureStatement, procedureIdentifier, actualParameter, gotoStatement, emptyStatement, empty, structuredStatement, compoundStatement, conditionalStatement, ifStatement, caseStatement, caseListElement, repetetiveStatement, whileStatement, repeatStatement, forStatement, forList, controlVariable, initialValue, finalValue, withStatement, recordVariableList; }; }; /////////////////////////////////////////////////////////////////////////////// // // The Pascal White Space Skipper // /////////////////////////////////////////////////////////////////////////////// struct pascal_skipper : public grammar<pascal_skipper> { pascal_skipper() {} template <typename ScannerT> struct definition { definition(pascal_skipper const& /*self*/) { skip = space_p | '{' >> (*(anychar_p - '}')) >> '}' // pascal comment 1 | "(*" >> (*(anychar_p - "*)")) >> "*)" // pascal comment 2 ; #ifdef SPIRIT_DEBUG SPIRIT_DEBUG_RULE(skip); #endif } rule<ScannerT> skip; rule<ScannerT> const& start() const { return skip; } }; }; /////////////////////////////////////////////////////////////////////////////// // // Parse a file // /////////////////////////////////////////////////////////////////////////////// static void parse(char const* filename) { ifstream in(filename); if (!in) { cerr << "Could not open input file: " << filename << endl; return; } in.unsetf(ios::skipws); // Turn of white space skipping on the stream vector<char> vec; std::copy( istream_iterator<char>(in), istream_iterator<char>(), std::back_inserter(vec)); vector<char>::const_iterator first = vec.begin(); vector<char>::const_iterator last = vec.end(); pascal_skipper skip_p; pascal_grammar p; #ifdef SPIRIT_DEBUG SPIRIT_DEBUG_NODE(skip_p); SPIRIT_DEBUG_NODE(p); #endif parse_info<vector<char>::const_iterator> info = parse(first, last, p, skip_p); if (info.full) { cout << "\t\t" << filename << " Parses OK\n\n\n"; } else { cerr << "---PARSING FAILURE---\n"; cerr << string(info.stop, last); } } /////////////////////////////////////////////////////////////////////////////// // // Main program // /////////////////////////////////////////////////////////////////////////////// int main(int argc, char* argv[]) { cout << "/////////////////////////////////////////////////////////\n\n"; cout << "\t\tPascal Grammar For Spirit...\n\n"; cout << "/////////////////////////////////////////////////////////\n\n"; if (argc > 1) { for (int i = 1; i < argc; ++i) { cout << argv[i] << endl; parse(argv[i]); } } else { cerr << "---NO FILENAME GIVEN---" << endl; } return 0; }