From a902dec4fd4b94dddba0f534bb5e2cfd1742d7ba Mon Sep 17 00:00:00 2001 From: Mario Ceresa <mrceresa@gmail.com> Date: Thu, 18 Oct 2012 16:15:06 +0200 Subject: [PATCH 04/15] Converted to lib+standalone program layout Signed-off-by: Mario Ceresa <mrceresa@gmail.com> --- CMakeLists.txt | 4 +- expatpp.cpp | 799 ----------------------------------------------------- expatpp.h | 339 ----------------------- lib/CMakeLists.txt | 3 + lib/expatpp.cpp | 799 +++++++++++++++++++++++++++++++++++++++++++++++++++++ lib/expatpp.h | 339 +++++++++++++++++++++++ 6 files changed, 1143 insertions(+), 1140 deletions(-) delete mode 100755 expatpp.cpp delete mode 100755 expatpp.h create mode 100644 lib/CMakeLists.txt create mode 100755 lib/expatpp.cpp create mode 100755 lib/expatpp.h diff --git a/CMakeLists.txt b/CMakeLists.txt index e3bdd8f..8db8529 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -2,5 +2,5 @@ project(expatpp) find_package(EXPAT REQUIRED) -add_library(expatpp SHARED expatpp.cpp) -target_link_libraries(expatpp ${EXPAT_LIBRARIES}) \ No newline at end of file +add_dir(lib) +add_dir(test) \ No newline at end of file diff --git a/expatpp.cpp b/expatpp.cpp deleted file mode 100755 index 8a7c532..0000000 --- a/expatpp.cpp +++ /dev/null @@ -1,799 +0,0 @@ -// expatpp -#ifdef UNDER_CE - #include <string.h> - #include <windows.h> - #include <dbgapi.h> - #define assert ASSERT -#else - #include <string> - using namespace std; - #include <assert.h> -#endif -#include "expatpp.h" - - -// may be defined in xmltchar.h or elsewhere -#ifndef tcscmp - #ifdef XML_UNICODE - #define tcscmp wcscmp - #else - #define tcscmp strcmp - #endif // XML_UNICODE -#endif // tcscmp - - -#ifndef BUFSIZ - #define BUFSIZ 4096 -#endif - -#include <stdio.h> -#include <string.h> - -expatpp::expatpp(bool createParser) : - mParser(0), // in case of exception below - mHaveParsed(false) -{ - if (createParser) { - // subclasses may call this ctor after parser created! - mParser = XML_ParserCreate(0); - SetupHandlers(); - } -} - - -void -expatpp::SetupHandlers() -{ - ::XML_SetUserData(mParser, this); - ::XML_SetElementHandler(mParser, startElementCallback, endElementCallback); - ::XML_SetCharacterDataHandler(mParser, charDataCallback); - ::XML_SetProcessingInstructionHandler(mParser, processingInstructionCallback); - ::XML_SetDefaultHandler(mParser, defaultHandlerCallback); - ::XML_SetUnparsedEntityDeclHandler(mParser, unParsedEntityDeclCallback); - ::XML_SetNotationDeclHandler(mParser, notationDeclCallback); - ::XML_SetNotStandaloneHandler(mParser, notStandaloneHandlerCallback); - ::XML_SetNamespaceDeclHandler(mParser, startNamespaceCallback, endNamespaceCallback); -#ifndef EXPATPP_COMPATIBLE_EXPAT12 - ::XML_SetAttlistDeclHandler(mParser, attlistDeclCallback); - ::XML_SetCdataSectionHandler(mParser, startCdataSectionCallback, endCdataSectionCallback); - ::XML_SetCommentHandler(mParser, commentCallback); - ::XML_SetDoctypeDeclHandler(mParser, startDoctypeDeclCallback, endDoctypeDeclCallback); - ::XML_SetElementDeclHandler(mParser, elementDeclCallback); - ::XML_SetEntityDeclHandler(mParser, entityDeclCallback); - ::XML_SetSkippedEntityHandler(mParser, skippedEntityCallback); - ::XML_SetXmlDeclHandler(mParser, xmlDeclCallback); -#endif -} - - -expatpp::~expatpp() -{ - if (mParser) // allows subclasses to avoid finishing parsing - ReleaseParser(); -} - - -/** - Provide single point that will call XML_ParserFree. - Nothing else in this code should call XML_ParserFree! -*/ -void -expatpp::ReleaseParser() -{ - ::XML_ParserFree(mParser); - mParser = 0; -} - - - -/** - Provide single point that will call XML_ParserReset. - Guarded against trivial reset before use in case that breaks - expat or creates overhead. - - \todo pass in encoding to XML_ParserReset when we support encodings -*/ -void -expatpp::ResetParser() -{ -#ifdef EXPATPP_COMPATIBLE_EXPAT12 - assert(!"Reset not available in earlier than expat 1.95.3");s -#else - if (mHaveParsed) { - ::XML_ParserReset(mParser, NULL); - SetupHandlers(); - mHaveParsed = false; - } -#endif -} - - -/** - Parse entire file, basically copy of the loop from the elements.c example. -*/ -XML_Status -expatpp::parseFile(FILE* inFile) -{ - ResetParser(); - - char buf[BUFSIZ]; - int done; - if (!inFile) - return XML_STATUS_ERROR; - fseek(inFile, 0, SEEK_SET); // reset for reading - do { - size_t len = fread(buf, 1, sizeof(buf), inFile); - done = len < sizeof(buf); - enum XML_Status parseStatus; - if ((parseStatus = XML_Parse(buf, len, done))!=XML_STATUS_OK) { - return parseStatus; - } - } while (!done); - return XML_STATUS_OK; -} - - -XML_Status -expatpp::XML_Parse(const char *s, int len, int isFinal) -{ - mHaveParsed = true; - const XML_Status retStatus = ::XML_Parse(mParser, s, len, isFinal); - if (isFinal) - CheckFinalStatus(retStatus); - return retStatus; -} - - -XML_Error -expatpp::XML_GetErrorCode() -{ - return ::XML_GetErrorCode(mParser); -} - - -int -expatpp::XML_GetCurrentLineNumber() -{ - return ::XML_GetCurrentLineNumber(mParser); -} - - -int -expatpp::XML_GetCurrentColumnNumber() -{ - return ::XML_GetCurrentColumnNumber(mParser); -} - - - - -/** - Parse string which is assumed to be entire XML document. - Written to stop stupid errors of being off by one in the string length causing - wasted debugging time, such as: -\verbatim - const char[] kSampleSettings = "<settings/>"; - const int sampleSize = sizeof(kSampleSettings)-1; // unless you remember to subtract one here will get invalid token error - if (!parser.XML_Parse(kSampleSettings, sampleSize, 1)) { -\endverbatim -*/ -XML_Status -expatpp::parseString(const char* inString) -{ - ResetParser(); - const int inLen = strlen(inString); - return XML_Parse(inString, inLen, 1); -} - -void -expatpp::startElementCallback(void *userData, const XML_Char* name, const XML_Char** atts) -{ - ((expatpp*)userData)->startElement(name, atts); -} - - -void -expatpp::endElementCallback(void *userData, const XML_Char* name) -{ - ((expatpp*)userData)->endElement(name); -} - - -void -expatpp::startNamespaceCallback(void *userData, const XML_Char* prefix, const XML_Char* uri) -{ - ((expatpp*)userData)->startNamespace(prefix, uri); -} - - -void -expatpp::endNamespaceCallback(void *userData, const XML_Char* prefix) -{ - ((expatpp*)userData)->endNamespace(prefix); -} - - -void -expatpp::charDataCallback(void *userData, const XML_Char* s, int len) -{ - ((expatpp*)userData)->charData(s, len); -} - - -void -expatpp:: processingInstructionCallback(void *userData, const XML_Char* target, const XML_Char* data) -{ - ((expatpp*)userData)->processingInstruction(target, data); -} - - -void -expatpp::defaultHandlerCallback(void* userData, const XML_Char* s, int len) -{ - ((expatpp*)userData)->defaultHandler(s, len); -} - - -int -expatpp::notStandaloneHandlerCallback(void* userData) -{ - return ((expatpp*)userData)->notStandaloneHandler(); -} - - -void -expatpp::unParsedEntityDeclCallback(void* userData, const XML_Char* entityName, const XML_Char* base, const XML_Char* systemId, const XML_Char* publicId, const XML_Char* notationName) -{ - ((expatpp*)userData)->unparsedEntityDecl(entityName, base, systemId, publicId, notationName); -} - - -void -expatpp::notationDeclCallback(void *userData, const XML_Char* notationName, const XML_Char* base, const XML_Char* systemId, const XML_Char* publicId) -{ - ((expatpp*)userData)->notationDecl(notationName, base, systemId, publicId); -} - - -void -expatpp::startElement(const XML_Char*, const XML_Char**) -{} - - -void -expatpp::endElement(const XML_Char*) -{} - - -void -expatpp::startNamespace(const XML_Char* /* prefix */, const XML_Char* /* uri */) -{} - - -void -expatpp::endNamespace(const XML_Char*) -{} - - -void -expatpp::charData(const XML_Char*, int ) -{ -} - - -void -expatpp::processingInstruction(const XML_Char*, const XML_Char*) -{ -} - - -void -expatpp::defaultHandler(const XML_Char*, int) -{ -} - - -int -expatpp::notStandaloneHandler() -{ - return 0; -} - - -void -expatpp::unparsedEntityDecl(const XML_Char*, const XML_Char*, const XML_Char*, const XML_Char*, const XML_Char*) -{ -} - - -void -expatpp::notationDecl(const XML_Char*, const XML_Char*, const XML_Char*, const XML_Char*) -{ -} - - -int -expatpp::skipWhiteSpace(const XML_Char* startFrom) -{ - // use our own XML definition of white space - // TO DO - confirm this is correct! - const XML_Char* s = startFrom; - XML_Char c = *s; - while ((c==' ') || (c=='\t') || (c=='\n') || (c=='\r')) { - s++; - c = *s; - } - const int numSkipped = s - startFrom; - return numSkipped; -} - - -/** - Iterate the paired attribute name/value until find a pair with matching name. - \return pointer to the value or null if not found. -*/ -const XML_Char* -expatpp::getAttribute(const XML_Char* matchingName, const XML_Char** atts) -{ - for (int i=0; atts[i]; i++) { - const XML_Char* attributeName = atts[i++]; - assert(attributeName); // shouldn't fail this because of loop test above - if(tcscmp(attributeName, matchingName)==0) { - return atts[i]; // if 2nd item was missing, this returns 0 safely indicating failure - } - } - return 0; -} - - -/** -\bug will always return 0 for PPC -*/ -bool -expatpp::getIntegerAttribute(const XML_Char *matchingName, const XML_Char **atts, int& outAtt) -{ - const XML_Char* attStr = getAttribute(matchingName, atts); - if (!attStr) - return false; - int i=0; -#ifdef XML_UNICODE -fail to compile because need this now -#else - sscanf(attStr, "%d", &i); -#endif - outAtt = i; - return true; -} - - -/** -\bug will always return 0 for PPC -*/ -bool -expatpp::getDoubleAttribute(const XML_Char *matchingName, const XML_Char **atts, double& outAtt) -{ - const XML_Char* attStr = getAttribute(matchingName, atts); - if (!attStr) - return false; - float f = 0.0; // sscanf doesn't allow point to double -#ifdef XML_UNICODE -fail to compile because need this now -#else - sscanf(attStr, "%f", &f); -#endif - outAtt = f; - return true; -} - - -bool -expatpp::emptyCharData(const XML_Char *s, int len) -{ -// usually call from top of overriden charData methods - if (len==0) - return true; //*** early exit - empty string, may never occur?? - -// skip newline and empty whitespace - if ( - ((len==1) && ( (s[0]=='\n') || (s[0]=='\r')) ) || // just CR or just LF - ((len==2) && (s[0]=='\r') && (s[1]=='\n')) // DOS-style CRLF - ) - return true; //*** early exit - newline - - const int lastCharAt = len-1; - if (s[lastCharAt]==' ') { // maybe all whitespace - int i; - for (i=0; i<lastCharAt; i++) { - if (s[i]!=' ') - break; - } - if (i==lastCharAt) - return true; //*** early exit - all spaces - } - return false; -} - - -//-------- Added for expat 1.95.5--------------- -void -expatpp::attlistDeclCallback(void *userData, - const XML_Char *elname, - const XML_Char *attname, - const XML_Char *att_type, - const XML_Char *dflt, - int isrequired) -{ - ((expatpp*)userData)->attlistDecl(elname, attname, att_type, dflt, isrequired); -} - - -void -expatpp::commentCallback(void *userData, const XML_Char *data) -{ - ((expatpp*)userData)->comment(data); -} - - -void -expatpp::elementDeclCallback(void *userData, const XML_Char *name, XML_Content *model) -{ - ((expatpp*)userData)->elementDecl(name, model); -} - - -void -expatpp::endCdataSectionCallback(void *userData) -{ - ((expatpp*)userData)->endCdataSection(); -} - - -void -expatpp::endDoctypeDeclCallback(void *userData) -{ - ((expatpp*)userData)->endDoctypeDecl(); -} - - -void -expatpp::entityDeclCallback(void *userData, - const XML_Char *entityName, - int is_parameter_entity, - const XML_Char *value, - int value_length, - const XML_Char *base, - const XML_Char *systemId, - const XML_Char *publicId, - const XML_Char *notationName) -{ - ((expatpp*)userData)->entityDecl(entityName, is_parameter_entity, value, value_length, base, systemId, publicId, notationName); -} - - -void -expatpp::skippedEntityCallback(void *userData, const XML_Char *entityName, int is_parameter_entity) -{ - ((expatpp*)userData)->skippedEntity(entityName, is_parameter_entity); -} - - -void -expatpp::startCdataSectionCallback(void *userData) -{ - ((expatpp*)userData)->startCdataSection(); -} - - -void -expatpp::startDoctypeDeclCallback(void *userData, - const XML_Char *doctypeName, - const XML_Char *sysid, - const XML_Char *pubid, - int has_internal_subset) -{ - ((expatpp*)userData)->startDoctypeDecl(doctypeName, sysid, pubid, has_internal_subset); -} - - -void -expatpp::xmlDeclCallback(void *userData, const XML_Char *version, - const XML_Char *encoding, - int standalone) -{ - ((expatpp*)userData)->xmlDecl(version, encoding, standalone); -} - - -void -expatpp::attlistDecl( - const XML_Char *elname, - const XML_Char *attname, - const XML_Char *att_type, - const XML_Char *dflt, - int isrequired) -{ -} - - -void -expatpp::comment( const XML_Char *data) -{ -} - - -void -expatpp::elementDecl( const XML_Char *name, XML_Content *model) -{ -} - - -void -expatpp::endCdataSection() -{ -} - - -void -expatpp::endDoctypeDecl() -{ -} - - -void -expatpp::entityDecl( - const XML_Char *entityName, - int is_parameter_entity, - const XML_Char *value, - int value_length, - const XML_Char *base, - const XML_Char *systemId, - const XML_Char *publicId, - const XML_Char *notationName) -{ -} - - -void -expatpp::skippedEntity( const XML_Char *entityName, int is_parameter_entity) -{ -} - - -void -expatpp::startCdataSection() -{ -} - - -void -expatpp::startDoctypeDecl(const XML_Char *doctypeName, - const XML_Char *sysid, - const XML_Char *pubid, - int has_internal_subset) -{ -} - - -void -expatpp::xmlDecl( const XML_Char *version, - const XML_Char *encoding, - int standalone) -{ -} - - - - -// ------------------------------------------------------- -// e x p a t p p N e s t i n g -// ------------------------------------------------------- -/** - \param parent can be null in which case this is root parser - - \note The handlers set in here MUST be also set in SetupHandlers - which is a virtual method invoked by expatpp::ResetParser. Otherwise - you can have subtle bugs with a nested parser not properly returning - after reusing a parser (nasty and found rapidly only via extensive unit - tests and plentiful assertions!). - - \WARNING - The assumption that is not obvious here is that if you want to use - nested parsers, then your topmost parser must also be an expatppNesting - subclass, NOT an expatpp subclass, because we need the - nestedStartElementCallback and nestedEndElementCallback - callbacks to override those in the expatpp ctor. - - - - \todo go back over code in detail and confirm above warning still valid - I think if we used expat's functions to invoke the registered callback - might be safer - the explicit function call we have in nestedEndElementCallback - certainly assumes the parent type. -*/ -expatppNesting::expatppNesting(expatppNesting* parent) : - expatpp(parent==0), // don't create parser - we're taking over from parent if given - mDepth(0), - mParent(parent), - mOwnedChild(0), - mSelfDeleting(true) -{ - if ( parent ) - { - RegisterWithParentXMLParser(); - parent->AdoptChild(this); - } - else - { - // No parent - the expatpp constructor will have created a new mParser (expat parser) - ::XML_SetElementHandler(mParser, nestedStartElementCallback, nestedEndElementCallback); - } - assert(mParser); // either we created above or expatpp -} - - -expatppNesting::~expatppNesting() -{ - assert(!mParent); // if we are a sub-parser, should not delete without calling returnToParent - DeleteChild(); -} - - -/** - Call parent version then override same as in our ctor. -*/ -void -expatppNesting::SetupHandlers() -{ - expatpp::SetupHandlers(); - ::XML_SetElementHandler(mParser, nestedStartElementCallback, nestedEndElementCallback); -} - -/** - Must use if you have adopted a child parser and want to dispose of it early. -*/ -void -expatppNesting::DeleteChild() -{ - delete mOwnedChild; - mOwnedChild = 0; -} - - -/** - Invoked as a callback from a child ctor when we pass in a parent pointer. - OR used from switchToNewSubParser, in which case it may be the 2nd time - we're called for a given child (see scenarios in expatppNesting class comment). -*/ -void -expatppNesting::AdoptChild(expatppNesting* adoptingChild) -{ - if ( mOwnedChild != adoptingChild ) - { - delete mOwnedChild; - mOwnedChild = adoptingChild; - } -} - - -/** - to use parent's underlying expat parser -*/ -void -expatppNesting::RegisterWithParentXMLParser() -{ - mParser = mParent->mParser; - ::XML_SetUserData(mParser, this); -} - - -/** - User code (typically the startElement handler of user parsers derived from expatppNesting) - may call - switchToNewSubParser( new UserChildParser() ); - to hand off the current document to a child parser that understands the next segment of XML. - Control will be returned to the original (parent) parser when the end of the child element - is reached. - In its lifetime a 'parent' parser may switch control to several child parsers (one at a time - of course) as it moves through the document encoutering various types of child element. - - A child to which older code (eg: OOFILE) has just switched control by - new childParser(this) will be self-deleting and will clear our mOwnedChild in its dtor. -*/ -void expatppNesting::switchToNewSubParser( expatppNesting* pAdoptedChild ) -{ - assert(pAdoptedChild); - AdoptChild(pAdoptedChild); - pAdoptedChild->BeAdopted(this); -} - - -/** - If this is root parser, nestedEndElementCallback won't call returnToParent. - Therefore it is safe to put parsers on the stack. -*/ -expatppNesting* -expatppNesting::returnToParent() -{ - expatppNesting* ret = mParent; - ::XML_SetUserData(mParser, mParent); - mParent=0; - mParser=0; // prevent parser shutdown by expatpp::~expatpp!! - if (mSelfDeleting) { - ret->OwnedChildOrphansItself(this); - delete this; // MUST BE LAST THING CALLED IN NON-VIRTUAL FUNCTION, NO MEMBER ACCESS - } - return ret; -} - - -void -expatppNesting::nestedStartElementCallback(void *userData, const XML_Char* name, const XML_Char** atts) -{ - assert(userData); - expatppNesting* nestedParser = (expatppNesting*)userData; - nestedParser->mDepth++; - nestedParser->startElement(name, atts); // probably user override -} - - -/** - If this is root parser, will never hit nestedEndElementCallback after closing element, - except for when we call it. - \param userData should be non-nil except for specific case of ending root -*/ -void -expatppNesting::nestedEndElementCallback(void *userData, const XML_Char* name) -{ - if (!userData) - return; // end tag for root - - expatppNesting* nestedParser = (expatppNesting*)userData; -// we don't know until we hit a closing tag 'outside' us that our run is done - if (nestedParser->mDepth==0) { - expatppNesting* parentParser = nestedParser->returnToParent(); - nestedEndElementCallback(parentParser, name); // callbacks for expatppNesting stay registered, so safe - //if we don't invoke their callback, they will not balance their mDepth - } - else { - // end of an element this parser has started - normal case - nestedParser->endElement(name); // probably user override - nestedParser->mDepth--; - } -} - - -/** - Called by switchToNewSubParser to indicate a newly created child parser - is now the currently active child for adoptingParent and the child - isn't expected to be self deleting. - - Normal code to create an owned child would be either - switchToNewSubParser( new UserChildParser(this) ); - where this is the currently active parser and you want to be deleting it, or - new UserChildParser(this); - to have a child parser self-delete - - \par Important Safety Note - Copes with the situation of people forgetting to pass - in the parent parser (and hence creating a new one by default) - if invoked by switchToNewSubParser( new UserChildParser() ) - by somewhat wastefully deleting the parser created in expatpp::expatpp - by us being a root parser. -*/ -void -expatppNesting::BeAdopted(expatppNesting* adoptingParent) -{ - if (mParent) { - assert(mParent==adoptingParent); - } - else { // root parser being adopted, cleanup! - ReleaseParser(); - mParent = adoptingParent; - RegisterWithParentXMLParser(); - } - mSelfDeleting = false; -} - - - - diff --git a/expatpp.h b/expatpp.h deleted file mode 100755 index 098c69d..0000000 --- a/expatpp.h +++ /dev/null @@ -1,339 +0,0 @@ -// expatpp -#ifndef H_EXPATPP -#define H_EXPATPP - -#ifdef EXPATPP_COMPATIBLE_EXPAT12 // earlier versions of expat up to v1.2 - #include "xmlparse.h" -#else - #include "expat.h" // since some version of expat moved to SourceForge -#endif -#include <stdio.h> -#include <assert.h> - - -/** -\file expatpp.h -Latest version 29-Dec-2002 compatible with expat 1.95.6 -*/ - -/** -expatpp follows a simple pattern for converting the semi-OOP callback design of -expat into a true class which allows you to override virtual methods to supply -callbacks. - -\par USING expatpp -see testexpatpp.cpp for a detailed example - -1) decide which callbacks you wish to use, eg: just startElement - -2) declare a subclass of expatpp, eg: -class myExpat : public expatpp { - virtual void startElement(const XML_Char* name, const XML_Char** atts); -}; - -3) create an instance of your object and pass in a buffer to parse -myExpat parser; -parser.XML_Parse(buf, len, done) - - -\par HOW IT WORKS -The User Data which expat maintains is simply a pointer to an instance of your object. - -Inline static functions are specified as the callbacks to expat. -These static functions take the user data parameter returned from expat and cast it -to a pointer to an expatpp object. - -Using that typed pointer they then call the appropriate virtual method. - -If you have overriden a given virtual method then your version will be called, otherwise -the (empty) method in the base expatpp class is called. - -\par Possible Efficiency Tactic -For efficiency, you could provide your own constructor and set some of the callbacks -to 0, so expat doesn't call the static functions. (untested idea). - -\par Naming Conventions -The virtual functions violate the usual AD Software convention of lowercase first letter -for public methods but this was a late change to protected and too much user code out there. - - -\todo Possibly implement some handling for XML_SetExternalEntityRefHandler which does NOT -receive user data, just the parser, so can't use normal pattern for invoking virtual methods - -\todo Possibly implement handling for XML_UnknownEncodingHandler. - -\todo review design for nested calls - not happy that it is the right thing that they don't see -their start and ending elements - makes it harder to unit test them in isolation. - -\todo unit tests - -\todo especially test abort mechanism - -\todo reinstate copy constrution and assignment with child parser cleanup - -\todo allow specification of encoding -*/ -class expatpp { -public: - expatpp(bool createParser=true); - virtual ~expatpp(); - - operator XML_Parser() const; - -protected: // callback virtuals should only be invoked through our Callback static functions - bool emptyCharData(const XML_Char* s, int len); // utility often used in overridden charData - -// overrideable callbacks - virtual void startElement(const XML_Char* name, const XML_Char** atts); - virtual void endElement(const XML_Char*); - virtual void charData(const XML_Char*, int len); - virtual void processingInstruction(const XML_Char* target, const XML_Char* data); - virtual void defaultHandler(const XML_Char*, int len); - virtual int notStandaloneHandler(); - virtual void unparsedEntityDecl(const XML_Char* entityName, const XML_Char* base, const XML_Char* systemId, const XML_Char* publicId, const XML_Char* notationName); - virtual void notationDecl(const XML_Char* notationName, const XML_Char* base, const XML_Char* systemId, const XML_Char* publicId); - virtual void startNamespace(const XML_Char* prefix, const XML_Char* uri); - virtual void endNamespace(const XML_Char*); -/// \name Callbacks added to support expat 1.95.5 -//@{ - virtual void attlistDecl( - const XML_Char *elname, - const XML_Char *attname, - const XML_Char *att_type, - const XML_Char *dflt, - int isrequired); - virtual void endCdataSection(); - virtual void endDoctypeDecl(); - virtual void comment( const XML_Char *data); - virtual void elementDecl( const XML_Char *name, XML_Content *model); - virtual void entityDecl( - const XML_Char *entityName, - int is_parameter_entity, - const XML_Char *value, - int value_length, - const XML_Char *base, - const XML_Char *systemId, - const XML_Char *publicId, - const XML_Char *notationName); - virtual void skippedEntity(const XML_Char *entityName, int is_parameter_entity); - virtual void startCdataSection(); - virtual void startDoctypeDecl(const XML_Char *doctypeName, - const XML_Char *sysid, - const XML_Char *pubid, - int has_internal_subset); - virtual void xmlDecl( const XML_Char *version, - const XML_Char *encoding, - int standalone); -//@} - -public: -/// \name XML interfaces -//@{ - XML_Status XML_Parse(const char* buffer, int len, int isFinal); - virtual XML_Status parseFile(FILE* inFile); - virtual XML_Status parseString(const char*); - XML_Error XML_GetErrorCode(); - int XML_GetCurrentLineNumber(); - int XML_GetCurrentColumnNumber(); -//@} - -protected: - XML_Parser mParser; - bool mHaveParsed; - -/// \name overrideables to customise behaviour, must call parent -//@{ - virtual void ReleaseParser(); - virtual void ResetParser(); - virtual void SetupHandlers(); -//@} - -/** - Override so subclass can react to an error causing exit from parse. - rather than leave it for application code to check status. - Useful point to insert logging to silently grab failed parses -*/ - virtual void CheckFinalStatus(XML_Status) {}; - -// static interface functions for callbacks -public: - static void startElementCallback(void *userData, const XML_Char* name, const XML_Char** atts); - static void endElementCallback(void *userData, const XML_Char* name); - static void startNamespaceCallback(void *userData, const XML_Char* prefix, const XML_Char* uri); - static void endNamespaceCallback(void *userData, const XML_Char* prefix); - static void charDataCallback(void *userData, const XML_Char* s, int len); - static void processingInstructionCallback(void *userData, const XML_Char* target, const XML_Char* data); - static void defaultHandlerCallback(void* userData, const XML_Char* s, int len); - static int notStandaloneHandlerCallback(void* userData); - static void unParsedEntityDeclCallback(void* userData, const XML_Char* entityName, const XML_Char* base, const XML_Char* systemId, const XML_Char* publicId, const XML_Char* notationName); - static void notationDeclCallback(void *userData, const XML_Char* notationName, const XML_Char* base, const XML_Char* systemId, const XML_Char* publicId); -/// \name Callback interfacess added to support expat 1.95.5 -//@{ - static void attlistDeclCallback(void *userData, - const XML_Char *elname, - const XML_Char *attname, - const XML_Char *att_type, - const XML_Char *dflt, - int isrequired); - static void commentCallback(void *userData, const XML_Char *data); - static void elementDeclCallback(void *userData, const XML_Char *name, XML_Content *model); - static void endCdataSectionCallback(void *userData); - static void endDoctypeDeclCallback(void *userData); - static void entityDeclCallback(void *userData, - const XML_Char *entityName, - int is_parameter_entity, - const XML_Char *value, - int value_length, - const XML_Char *base, - const XML_Char *systemId, - const XML_Char *publicId, - const XML_Char *notationName); - static void skippedEntityCallback(void *userData, const XML_Char *entityName, int is_parameter_entity); - static void startCdataSectionCallback(void *userData); - static void startDoctypeDeclCallback(void *userData, - const XML_Char *doctypeName, - const XML_Char *sysid, - const XML_Char *pubid, - int has_internal_subset); - static void xmlDeclCallback(void *userData, const XML_Char *version, - const XML_Char *encoding, - int standalone); -//@} - - -// utilities - static int skipWhiteSpace(const XML_Char*); - static const XML_Char* getAttribute(const XML_Char *matchingName, const XML_Char **atts); - static bool getIntegerAttribute(const XML_Char *matchingName, const XML_Char **atts, int& outAtt); - static bool getDoubleAttribute(const XML_Char *matchingName, const XML_Char **atts, double& outAtt); -}; - - -/** - subclass to support a hierarchy of parsers, in a sort of recursion or - 'nesting' approach, where a top-level parser might create sub-parsers - for part of a file. - - The currently active child parser is owned (mOwnedChild) and is deleted - by DeleteChild (invoked from the dtor) so error handling can propagate - up the tree, closing parsers, without leaks. - - \par Switching to sub-parsers - You can transfer to a sub-parser with - - new UserChildParser(this) // carries on using our parser, is self-deleting - - switchToNewSubParser( someVar = new UserChildParser(this) ) // if want to get values back after end parsing - - \warning You can accidentally invoke a new parser without it doing anything - - new UserChildParser() // will be new top-level parser, nothing to do with our XML - - \par Self-deletion - If you transfer control to a sub-parser with just new UserChildParser(this) then - it will be automatically self-deleting in its returnToParent method and - will invoke OwnedChildOrphansItself to clear our mOwnedChild. - - The reason for self-deletion being governed by a somewhat complex chain of - calls rather than simply a boolean flag is because expatpp has been in use - worldwide for many years and it was deemed too unfriendly to break code in - a manner which could cause unwanted side effects - the current approach safely - preserves self-deletion but also allows for expatpp to have parent parsers - own and delete children, without compiling with different options. - - \note - If you invoke a sub-parser with switchToNewSubParser( new UserChildParser() ); - then the user child parser will start with a new XML parser instance - created by the expatpp ctor. This is safe but slightly wasteful of processing - as the new parser will be discarded by BeAdopted(). - - \par Switching to child and explicitly deleting - switchToNewSubParser( somevar = new UserChildParser(this) ) allows you to get values - back out of the child parser, in the context of the parent, eg: - -\verbatim - -void MultiFilterParser::startElement(const XML_Char* name, const XML_Char **atts) -{ - if(strcmp(name,"FilterRequest")==0) { - switchToNewSubParser( - mCurrentFilterParser = new FilterRequestParser(this, atts) - ); // we own and will have to explicitly delete -... -} - -void MultiFilterParser::endElement(const XML_Char *name) -{ - if(strcmp(name,"FilterRequest")==0) { - assert(mCurrentFilterParser); - FilterClause* newClause = mCurrentFilterParser->orphanBuiltClause(); // retrieve data built by sub-parser -... - mCurrentFilterParser = 0; - DeleteChild(); - } -} -\endverbatim -*/ -class expatppNesting : public expatpp { - -public: - expatppNesting(expatppNesting* parent=0); ///< NOT a copy ctor!! this is a recursive situation - virtual ~expatppNesting(); - - void switchToNewSubParser( expatppNesting* pAdoptedChild ); - expatppNesting* returnToParent(); - -protected: - void BeAdopted(expatppNesting* adoptingParent); - void OwnedChildOrphansItself(expatppNesting* callingChild); - void RegisterWithParentXMLParser(); - virtual void AdoptChild(expatppNesting* adoptingChild); - virtual void DeleteChild(); - - int mDepth; - bool mSelfDeleting; ///< only valid if mParent not null - expatppNesting* mParent; ///< may be null the parent owns this object - expatppNesting* mOwnedChild; ///< owned, optional currently active child (auto_ptr not used to avoid STL dependency) - -public: -/// \name interface functions for callbacks -//@{ - static void nestedStartElementCallback(void* userData, const XML_Char* name, const XML_Char** atts); - static void nestedEndElementCallback(void* userData, const XML_Char* name); -//@} - - -/// \name overrideables to customise behaviour, must call parent -//@{ - virtual void SetupHandlers(); -//@} - -private: - // Forbid copy-construction and assignment, to prevent double-deletion of mOwnedChild - expatppNesting( const expatppNesting & ); - expatppNesting & operator=( const expatppNesting & ); -}; - - -// inlines - -// ------------------------------------------------------- -// e x p a t p p -// ------------------------------------------------------- -inline -expatpp::operator XML_Parser() const -{ - return mParser; -} - - -// ------------------------------------------------------- -// e x p a t p p N e s t i n g -// ------------------------------------------------------- -inline void -expatppNesting::OwnedChildOrphansItself(expatppNesting* callingChild) -{ - assert(callingChild==mOwnedChild); - mOwnedChild = 0; -} - - - -#endif // H_EXPATPP diff --git a/lib/CMakeLists.txt b/lib/CMakeLists.txt new file mode 100644 index 0000000..ed70f58 --- /dev/null +++ b/lib/CMakeLists.txt @@ -0,0 +1,3 @@ + +add_library(expatpp SHARED expatpp.cpp) +target_link_libraries(expatpp ${EXPAT_LIBRARIES}) \ No newline at end of file diff --git a/lib/expatpp.cpp b/lib/expatpp.cpp new file mode 100755 index 0000000..8a7c532 --- /dev/null +++ b/lib/expatpp.cpp @@ -0,0 +1,799 @@ +// expatpp +#ifdef UNDER_CE + #include <string.h> + #include <windows.h> + #include <dbgapi.h> + #define assert ASSERT +#else + #include <string> + using namespace std; + #include <assert.h> +#endif +#include "expatpp.h" + + +// may be defined in xmltchar.h or elsewhere +#ifndef tcscmp + #ifdef XML_UNICODE + #define tcscmp wcscmp + #else + #define tcscmp strcmp + #endif // XML_UNICODE +#endif // tcscmp + + +#ifndef BUFSIZ + #define BUFSIZ 4096 +#endif + +#include <stdio.h> +#include <string.h> + +expatpp::expatpp(bool createParser) : + mParser(0), // in case of exception below + mHaveParsed(false) +{ + if (createParser) { + // subclasses may call this ctor after parser created! + mParser = XML_ParserCreate(0); + SetupHandlers(); + } +} + + +void +expatpp::SetupHandlers() +{ + ::XML_SetUserData(mParser, this); + ::XML_SetElementHandler(mParser, startElementCallback, endElementCallback); + ::XML_SetCharacterDataHandler(mParser, charDataCallback); + ::XML_SetProcessingInstructionHandler(mParser, processingInstructionCallback); + ::XML_SetDefaultHandler(mParser, defaultHandlerCallback); + ::XML_SetUnparsedEntityDeclHandler(mParser, unParsedEntityDeclCallback); + ::XML_SetNotationDeclHandler(mParser, notationDeclCallback); + ::XML_SetNotStandaloneHandler(mParser, notStandaloneHandlerCallback); + ::XML_SetNamespaceDeclHandler(mParser, startNamespaceCallback, endNamespaceCallback); +#ifndef EXPATPP_COMPATIBLE_EXPAT12 + ::XML_SetAttlistDeclHandler(mParser, attlistDeclCallback); + ::XML_SetCdataSectionHandler(mParser, startCdataSectionCallback, endCdataSectionCallback); + ::XML_SetCommentHandler(mParser, commentCallback); + ::XML_SetDoctypeDeclHandler(mParser, startDoctypeDeclCallback, endDoctypeDeclCallback); + ::XML_SetElementDeclHandler(mParser, elementDeclCallback); + ::XML_SetEntityDeclHandler(mParser, entityDeclCallback); + ::XML_SetSkippedEntityHandler(mParser, skippedEntityCallback); + ::XML_SetXmlDeclHandler(mParser, xmlDeclCallback); +#endif +} + + +expatpp::~expatpp() +{ + if (mParser) // allows subclasses to avoid finishing parsing + ReleaseParser(); +} + + +/** + Provide single point that will call XML_ParserFree. + Nothing else in this code should call XML_ParserFree! +*/ +void +expatpp::ReleaseParser() +{ + ::XML_ParserFree(mParser); + mParser = 0; +} + + + +/** + Provide single point that will call XML_ParserReset. + Guarded against trivial reset before use in case that breaks + expat or creates overhead. + + \todo pass in encoding to XML_ParserReset when we support encodings +*/ +void +expatpp::ResetParser() +{ +#ifdef EXPATPP_COMPATIBLE_EXPAT12 + assert(!"Reset not available in earlier than expat 1.95.3");s +#else + if (mHaveParsed) { + ::XML_ParserReset(mParser, NULL); + SetupHandlers(); + mHaveParsed = false; + } +#endif +} + + +/** + Parse entire file, basically copy of the loop from the elements.c example. +*/ +XML_Status +expatpp::parseFile(FILE* inFile) +{ + ResetParser(); + + char buf[BUFSIZ]; + int done; + if (!inFile) + return XML_STATUS_ERROR; + fseek(inFile, 0, SEEK_SET); // reset for reading + do { + size_t len = fread(buf, 1, sizeof(buf), inFile); + done = len < sizeof(buf); + enum XML_Status parseStatus; + if ((parseStatus = XML_Parse(buf, len, done))!=XML_STATUS_OK) { + return parseStatus; + } + } while (!done); + return XML_STATUS_OK; +} + + +XML_Status +expatpp::XML_Parse(const char *s, int len, int isFinal) +{ + mHaveParsed = true; + const XML_Status retStatus = ::XML_Parse(mParser, s, len, isFinal); + if (isFinal) + CheckFinalStatus(retStatus); + return retStatus; +} + + +XML_Error +expatpp::XML_GetErrorCode() +{ + return ::XML_GetErrorCode(mParser); +} + + +int +expatpp::XML_GetCurrentLineNumber() +{ + return ::XML_GetCurrentLineNumber(mParser); +} + + +int +expatpp::XML_GetCurrentColumnNumber() +{ + return ::XML_GetCurrentColumnNumber(mParser); +} + + + + +/** + Parse string which is assumed to be entire XML document. + Written to stop stupid errors of being off by one in the string length causing + wasted debugging time, such as: +\verbatim + const char[] kSampleSettings = "<settings/>"; + const int sampleSize = sizeof(kSampleSettings)-1; // unless you remember to subtract one here will get invalid token error + if (!parser.XML_Parse(kSampleSettings, sampleSize, 1)) { +\endverbatim +*/ +XML_Status +expatpp::parseString(const char* inString) +{ + ResetParser(); + const int inLen = strlen(inString); + return XML_Parse(inString, inLen, 1); +} + +void +expatpp::startElementCallback(void *userData, const XML_Char* name, const XML_Char** atts) +{ + ((expatpp*)userData)->startElement(name, atts); +} + + +void +expatpp::endElementCallback(void *userData, const XML_Char* name) +{ + ((expatpp*)userData)->endElement(name); +} + + +void +expatpp::startNamespaceCallback(void *userData, const XML_Char* prefix, const XML_Char* uri) +{ + ((expatpp*)userData)->startNamespace(prefix, uri); +} + + +void +expatpp::endNamespaceCallback(void *userData, const XML_Char* prefix) +{ + ((expatpp*)userData)->endNamespace(prefix); +} + + +void +expatpp::charDataCallback(void *userData, const XML_Char* s, int len) +{ + ((expatpp*)userData)->charData(s, len); +} + + +void +expatpp:: processingInstructionCallback(void *userData, const XML_Char* target, const XML_Char* data) +{ + ((expatpp*)userData)->processingInstruction(target, data); +} + + +void +expatpp::defaultHandlerCallback(void* userData, const XML_Char* s, int len) +{ + ((expatpp*)userData)->defaultHandler(s, len); +} + + +int +expatpp::notStandaloneHandlerCallback(void* userData) +{ + return ((expatpp*)userData)->notStandaloneHandler(); +} + + +void +expatpp::unParsedEntityDeclCallback(void* userData, const XML_Char* entityName, const XML_Char* base, const XML_Char* systemId, const XML_Char* publicId, const XML_Char* notationName) +{ + ((expatpp*)userData)->unparsedEntityDecl(entityName, base, systemId, publicId, notationName); +} + + +void +expatpp::notationDeclCallback(void *userData, const XML_Char* notationName, const XML_Char* base, const XML_Char* systemId, const XML_Char* publicId) +{ + ((expatpp*)userData)->notationDecl(notationName, base, systemId, publicId); +} + + +void +expatpp::startElement(const XML_Char*, const XML_Char**) +{} + + +void +expatpp::endElement(const XML_Char*) +{} + + +void +expatpp::startNamespace(const XML_Char* /* prefix */, const XML_Char* /* uri */) +{} + + +void +expatpp::endNamespace(const XML_Char*) +{} + + +void +expatpp::charData(const XML_Char*, int ) +{ +} + + +void +expatpp::processingInstruction(const XML_Char*, const XML_Char*) +{ +} + + +void +expatpp::defaultHandler(const XML_Char*, int) +{ +} + + +int +expatpp::notStandaloneHandler() +{ + return 0; +} + + +void +expatpp::unparsedEntityDecl(const XML_Char*, const XML_Char*, const XML_Char*, const XML_Char*, const XML_Char*) +{ +} + + +void +expatpp::notationDecl(const XML_Char*, const XML_Char*, const XML_Char*, const XML_Char*) +{ +} + + +int +expatpp::skipWhiteSpace(const XML_Char* startFrom) +{ + // use our own XML definition of white space + // TO DO - confirm this is correct! + const XML_Char* s = startFrom; + XML_Char c = *s; + while ((c==' ') || (c=='\t') || (c=='\n') || (c=='\r')) { + s++; + c = *s; + } + const int numSkipped = s - startFrom; + return numSkipped; +} + + +/** + Iterate the paired attribute name/value until find a pair with matching name. + \return pointer to the value or null if not found. +*/ +const XML_Char* +expatpp::getAttribute(const XML_Char* matchingName, const XML_Char** atts) +{ + for (int i=0; atts[i]; i++) { + const XML_Char* attributeName = atts[i++]; + assert(attributeName); // shouldn't fail this because of loop test above + if(tcscmp(attributeName, matchingName)==0) { + return atts[i]; // if 2nd item was missing, this returns 0 safely indicating failure + } + } + return 0; +} + + +/** +\bug will always return 0 for PPC +*/ +bool +expatpp::getIntegerAttribute(const XML_Char *matchingName, const XML_Char **atts, int& outAtt) +{ + const XML_Char* attStr = getAttribute(matchingName, atts); + if (!attStr) + return false; + int i=0; +#ifdef XML_UNICODE +fail to compile because need this now +#else + sscanf(attStr, "%d", &i); +#endif + outAtt = i; + return true; +} + + +/** +\bug will always return 0 for PPC +*/ +bool +expatpp::getDoubleAttribute(const XML_Char *matchingName, const XML_Char **atts, double& outAtt) +{ + const XML_Char* attStr = getAttribute(matchingName, atts); + if (!attStr) + return false; + float f = 0.0; // sscanf doesn't allow point to double +#ifdef XML_UNICODE +fail to compile because need this now +#else + sscanf(attStr, "%f", &f); +#endif + outAtt = f; + return true; +} + + +bool +expatpp::emptyCharData(const XML_Char *s, int len) +{ +// usually call from top of overriden charData methods + if (len==0) + return true; //*** early exit - empty string, may never occur?? + +// skip newline and empty whitespace + if ( + ((len==1) && ( (s[0]=='\n') || (s[0]=='\r')) ) || // just CR or just LF + ((len==2) && (s[0]=='\r') && (s[1]=='\n')) // DOS-style CRLF + ) + return true; //*** early exit - newline + + const int lastCharAt = len-1; + if (s[lastCharAt]==' ') { // maybe all whitespace + int i; + for (i=0; i<lastCharAt; i++) { + if (s[i]!=' ') + break; + } + if (i==lastCharAt) + return true; //*** early exit - all spaces + } + return false; +} + + +//-------- Added for expat 1.95.5--------------- +void +expatpp::attlistDeclCallback(void *userData, + const XML_Char *elname, + const XML_Char *attname, + const XML_Char *att_type, + const XML_Char *dflt, + int isrequired) +{ + ((expatpp*)userData)->attlistDecl(elname, attname, att_type, dflt, isrequired); +} + + +void +expatpp::commentCallback(void *userData, const XML_Char *data) +{ + ((expatpp*)userData)->comment(data); +} + + +void +expatpp::elementDeclCallback(void *userData, const XML_Char *name, XML_Content *model) +{ + ((expatpp*)userData)->elementDecl(name, model); +} + + +void +expatpp::endCdataSectionCallback(void *userData) +{ + ((expatpp*)userData)->endCdataSection(); +} + + +void +expatpp::endDoctypeDeclCallback(void *userData) +{ + ((expatpp*)userData)->endDoctypeDecl(); +} + + +void +expatpp::entityDeclCallback(void *userData, + const XML_Char *entityName, + int is_parameter_entity, + const XML_Char *value, + int value_length, + const XML_Char *base, + const XML_Char *systemId, + const XML_Char *publicId, + const XML_Char *notationName) +{ + ((expatpp*)userData)->entityDecl(entityName, is_parameter_entity, value, value_length, base, systemId, publicId, notationName); +} + + +void +expatpp::skippedEntityCallback(void *userData, const XML_Char *entityName, int is_parameter_entity) +{ + ((expatpp*)userData)->skippedEntity(entityName, is_parameter_entity); +} + + +void +expatpp::startCdataSectionCallback(void *userData) +{ + ((expatpp*)userData)->startCdataSection(); +} + + +void +expatpp::startDoctypeDeclCallback(void *userData, + const XML_Char *doctypeName, + const XML_Char *sysid, + const XML_Char *pubid, + int has_internal_subset) +{ + ((expatpp*)userData)->startDoctypeDecl(doctypeName, sysid, pubid, has_internal_subset); +} + + +void +expatpp::xmlDeclCallback(void *userData, const XML_Char *version, + const XML_Char *encoding, + int standalone) +{ + ((expatpp*)userData)->xmlDecl(version, encoding, standalone); +} + + +void +expatpp::attlistDecl( + const XML_Char *elname, + const XML_Char *attname, + const XML_Char *att_type, + const XML_Char *dflt, + int isrequired) +{ +} + + +void +expatpp::comment( const XML_Char *data) +{ +} + + +void +expatpp::elementDecl( const XML_Char *name, XML_Content *model) +{ +} + + +void +expatpp::endCdataSection() +{ +} + + +void +expatpp::endDoctypeDecl() +{ +} + + +void +expatpp::entityDecl( + const XML_Char *entityName, + int is_parameter_entity, + const XML_Char *value, + int value_length, + const XML_Char *base, + const XML_Char *systemId, + const XML_Char *publicId, + const XML_Char *notationName) +{ +} + + +void +expatpp::skippedEntity( const XML_Char *entityName, int is_parameter_entity) +{ +} + + +void +expatpp::startCdataSection() +{ +} + + +void +expatpp::startDoctypeDecl(const XML_Char *doctypeName, + const XML_Char *sysid, + const XML_Char *pubid, + int has_internal_subset) +{ +} + + +void +expatpp::xmlDecl( const XML_Char *version, + const XML_Char *encoding, + int standalone) +{ +} + + + + +// ------------------------------------------------------- +// e x p a t p p N e s t i n g +// ------------------------------------------------------- +/** + \param parent can be null in which case this is root parser + + \note The handlers set in here MUST be also set in SetupHandlers + which is a virtual method invoked by expatpp::ResetParser. Otherwise + you can have subtle bugs with a nested parser not properly returning + after reusing a parser (nasty and found rapidly only via extensive unit + tests and plentiful assertions!). + + \WARNING + The assumption that is not obvious here is that if you want to use + nested parsers, then your topmost parser must also be an expatppNesting + subclass, NOT an expatpp subclass, because we need the + nestedStartElementCallback and nestedEndElementCallback + callbacks to override those in the expatpp ctor. + + + + \todo go back over code in detail and confirm above warning still valid + I think if we used expat's functions to invoke the registered callback + might be safer - the explicit function call we have in nestedEndElementCallback + certainly assumes the parent type. +*/ +expatppNesting::expatppNesting(expatppNesting* parent) : + expatpp(parent==0), // don't create parser - we're taking over from parent if given + mDepth(0), + mParent(parent), + mOwnedChild(0), + mSelfDeleting(true) +{ + if ( parent ) + { + RegisterWithParentXMLParser(); + parent->AdoptChild(this); + } + else + { + // No parent - the expatpp constructor will have created a new mParser (expat parser) + ::XML_SetElementHandler(mParser, nestedStartElementCallback, nestedEndElementCallback); + } + assert(mParser); // either we created above or expatpp +} + + +expatppNesting::~expatppNesting() +{ + assert(!mParent); // if we are a sub-parser, should not delete without calling returnToParent + DeleteChild(); +} + + +/** + Call parent version then override same as in our ctor. +*/ +void +expatppNesting::SetupHandlers() +{ + expatpp::SetupHandlers(); + ::XML_SetElementHandler(mParser, nestedStartElementCallback, nestedEndElementCallback); +} + +/** + Must use if you have adopted a child parser and want to dispose of it early. +*/ +void +expatppNesting::DeleteChild() +{ + delete mOwnedChild; + mOwnedChild = 0; +} + + +/** + Invoked as a callback from a child ctor when we pass in a parent pointer. + OR used from switchToNewSubParser, in which case it may be the 2nd time + we're called for a given child (see scenarios in expatppNesting class comment). +*/ +void +expatppNesting::AdoptChild(expatppNesting* adoptingChild) +{ + if ( mOwnedChild != adoptingChild ) + { + delete mOwnedChild; + mOwnedChild = adoptingChild; + } +} + + +/** + to use parent's underlying expat parser +*/ +void +expatppNesting::RegisterWithParentXMLParser() +{ + mParser = mParent->mParser; + ::XML_SetUserData(mParser, this); +} + + +/** + User code (typically the startElement handler of user parsers derived from expatppNesting) + may call + switchToNewSubParser( new UserChildParser() ); + to hand off the current document to a child parser that understands the next segment of XML. + Control will be returned to the original (parent) parser when the end of the child element + is reached. + In its lifetime a 'parent' parser may switch control to several child parsers (one at a time + of course) as it moves through the document encoutering various types of child element. + + A child to which older code (eg: OOFILE) has just switched control by + new childParser(this) will be self-deleting and will clear our mOwnedChild in its dtor. +*/ +void expatppNesting::switchToNewSubParser( expatppNesting* pAdoptedChild ) +{ + assert(pAdoptedChild); + AdoptChild(pAdoptedChild); + pAdoptedChild->BeAdopted(this); +} + + +/** + If this is root parser, nestedEndElementCallback won't call returnToParent. + Therefore it is safe to put parsers on the stack. +*/ +expatppNesting* +expatppNesting::returnToParent() +{ + expatppNesting* ret = mParent; + ::XML_SetUserData(mParser, mParent); + mParent=0; + mParser=0; // prevent parser shutdown by expatpp::~expatpp!! + if (mSelfDeleting) { + ret->OwnedChildOrphansItself(this); + delete this; // MUST BE LAST THING CALLED IN NON-VIRTUAL FUNCTION, NO MEMBER ACCESS + } + return ret; +} + + +void +expatppNesting::nestedStartElementCallback(void *userData, const XML_Char* name, const XML_Char** atts) +{ + assert(userData); + expatppNesting* nestedParser = (expatppNesting*)userData; + nestedParser->mDepth++; + nestedParser->startElement(name, atts); // probably user override +} + + +/** + If this is root parser, will never hit nestedEndElementCallback after closing element, + except for when we call it. + \param userData should be non-nil except for specific case of ending root +*/ +void +expatppNesting::nestedEndElementCallback(void *userData, const XML_Char* name) +{ + if (!userData) + return; // end tag for root + + expatppNesting* nestedParser = (expatppNesting*)userData; +// we don't know until we hit a closing tag 'outside' us that our run is done + if (nestedParser->mDepth==0) { + expatppNesting* parentParser = nestedParser->returnToParent(); + nestedEndElementCallback(parentParser, name); // callbacks for expatppNesting stay registered, so safe + //if we don't invoke their callback, they will not balance their mDepth + } + else { + // end of an element this parser has started - normal case + nestedParser->endElement(name); // probably user override + nestedParser->mDepth--; + } +} + + +/** + Called by switchToNewSubParser to indicate a newly created child parser + is now the currently active child for adoptingParent and the child + isn't expected to be self deleting. + + Normal code to create an owned child would be either + switchToNewSubParser( new UserChildParser(this) ); + where this is the currently active parser and you want to be deleting it, or + new UserChildParser(this); + to have a child parser self-delete + + \par Important Safety Note + Copes with the situation of people forgetting to pass + in the parent parser (and hence creating a new one by default) + if invoked by switchToNewSubParser( new UserChildParser() ) + by somewhat wastefully deleting the parser created in expatpp::expatpp + by us being a root parser. +*/ +void +expatppNesting::BeAdopted(expatppNesting* adoptingParent) +{ + if (mParent) { + assert(mParent==adoptingParent); + } + else { // root parser being adopted, cleanup! + ReleaseParser(); + mParent = adoptingParent; + RegisterWithParentXMLParser(); + } + mSelfDeleting = false; +} + + + + diff --git a/lib/expatpp.h b/lib/expatpp.h new file mode 100755 index 0000000..098c69d --- /dev/null +++ b/lib/expatpp.h @@ -0,0 +1,339 @@ +// expatpp +#ifndef H_EXPATPP +#define H_EXPATPP + +#ifdef EXPATPP_COMPATIBLE_EXPAT12 // earlier versions of expat up to v1.2 + #include "xmlparse.h" +#else + #include "expat.h" // since some version of expat moved to SourceForge +#endif +#include <stdio.h> +#include <assert.h> + + +/** +\file expatpp.h +Latest version 29-Dec-2002 compatible with expat 1.95.6 +*/ + +/** +expatpp follows a simple pattern for converting the semi-OOP callback design of +expat into a true class which allows you to override virtual methods to supply +callbacks. + +\par USING expatpp +see testexpatpp.cpp for a detailed example + +1) decide which callbacks you wish to use, eg: just startElement + +2) declare a subclass of expatpp, eg: +class myExpat : public expatpp { + virtual void startElement(const XML_Char* name, const XML_Char** atts); +}; + +3) create an instance of your object and pass in a buffer to parse +myExpat parser; +parser.XML_Parse(buf, len, done) + + +\par HOW IT WORKS +The User Data which expat maintains is simply a pointer to an instance of your object. + +Inline static functions are specified as the callbacks to expat. +These static functions take the user data parameter returned from expat and cast it +to a pointer to an expatpp object. + +Using that typed pointer they then call the appropriate virtual method. + +If you have overriden a given virtual method then your version will be called, otherwise +the (empty) method in the base expatpp class is called. + +\par Possible Efficiency Tactic +For efficiency, you could provide your own constructor and set some of the callbacks +to 0, so expat doesn't call the static functions. (untested idea). + +\par Naming Conventions +The virtual functions violate the usual AD Software convention of lowercase first letter +for public methods but this was a late change to protected and too much user code out there. + + +\todo Possibly implement some handling for XML_SetExternalEntityRefHandler which does NOT +receive user data, just the parser, so can't use normal pattern for invoking virtual methods + +\todo Possibly implement handling for XML_UnknownEncodingHandler. + +\todo review design for nested calls - not happy that it is the right thing that they don't see +their start and ending elements - makes it harder to unit test them in isolation. + +\todo unit tests + +\todo especially test abort mechanism + +\todo reinstate copy constrution and assignment with child parser cleanup + +\todo allow specification of encoding +*/ +class expatpp { +public: + expatpp(bool createParser=true); + virtual ~expatpp(); + + operator XML_Parser() const; + +protected: // callback virtuals should only be invoked through our Callback static functions + bool emptyCharData(const XML_Char* s, int len); // utility often used in overridden charData + +// overrideable callbacks + virtual void startElement(const XML_Char* name, const XML_Char** atts); + virtual void endElement(const XML_Char*); + virtual void charData(const XML_Char*, int len); + virtual void processingInstruction(const XML_Char* target, const XML_Char* data); + virtual void defaultHandler(const XML_Char*, int len); + virtual int notStandaloneHandler(); + virtual void unparsedEntityDecl(const XML_Char* entityName, const XML_Char* base, const XML_Char* systemId, const XML_Char* publicId, const XML_Char* notationName); + virtual void notationDecl(const XML_Char* notationName, const XML_Char* base, const XML_Char* systemId, const XML_Char* publicId); + virtual void startNamespace(const XML_Char* prefix, const XML_Char* uri); + virtual void endNamespace(const XML_Char*); +/// \name Callbacks added to support expat 1.95.5 +//@{ + virtual void attlistDecl( + const XML_Char *elname, + const XML_Char *attname, + const XML_Char *att_type, + const XML_Char *dflt, + int isrequired); + virtual void endCdataSection(); + virtual void endDoctypeDecl(); + virtual void comment( const XML_Char *data); + virtual void elementDecl( const XML_Char *name, XML_Content *model); + virtual void entityDecl( + const XML_Char *entityName, + int is_parameter_entity, + const XML_Char *value, + int value_length, + const XML_Char *base, + const XML_Char *systemId, + const XML_Char *publicId, + const XML_Char *notationName); + virtual void skippedEntity(const XML_Char *entityName, int is_parameter_entity); + virtual void startCdataSection(); + virtual void startDoctypeDecl(const XML_Char *doctypeName, + const XML_Char *sysid, + const XML_Char *pubid, + int has_internal_subset); + virtual void xmlDecl( const XML_Char *version, + const XML_Char *encoding, + int standalone); +//@} + +public: +/// \name XML interfaces +//@{ + XML_Status XML_Parse(const char* buffer, int len, int isFinal); + virtual XML_Status parseFile(FILE* inFile); + virtual XML_Status parseString(const char*); + XML_Error XML_GetErrorCode(); + int XML_GetCurrentLineNumber(); + int XML_GetCurrentColumnNumber(); +//@} + +protected: + XML_Parser mParser; + bool mHaveParsed; + +/// \name overrideables to customise behaviour, must call parent +//@{ + virtual void ReleaseParser(); + virtual void ResetParser(); + virtual void SetupHandlers(); +//@} + +/** + Override so subclass can react to an error causing exit from parse. + rather than leave it for application code to check status. + Useful point to insert logging to silently grab failed parses +*/ + virtual void CheckFinalStatus(XML_Status) {}; + +// static interface functions for callbacks +public: + static void startElementCallback(void *userData, const XML_Char* name, const XML_Char** atts); + static void endElementCallback(void *userData, const XML_Char* name); + static void startNamespaceCallback(void *userData, const XML_Char* prefix, const XML_Char* uri); + static void endNamespaceCallback(void *userData, const XML_Char* prefix); + static void charDataCallback(void *userData, const XML_Char* s, int len); + static void processingInstructionCallback(void *userData, const XML_Char* target, const XML_Char* data); + static void defaultHandlerCallback(void* userData, const XML_Char* s, int len); + static int notStandaloneHandlerCallback(void* userData); + static void unParsedEntityDeclCallback(void* userData, const XML_Char* entityName, const XML_Char* base, const XML_Char* systemId, const XML_Char* publicId, const XML_Char* notationName); + static void notationDeclCallback(void *userData, const XML_Char* notationName, const XML_Char* base, const XML_Char* systemId, const XML_Char* publicId); +/// \name Callback interfacess added to support expat 1.95.5 +//@{ + static void attlistDeclCallback(void *userData, + const XML_Char *elname, + const XML_Char *attname, + const XML_Char *att_type, + const XML_Char *dflt, + int isrequired); + static void commentCallback(void *userData, const XML_Char *data); + static void elementDeclCallback(void *userData, const XML_Char *name, XML_Content *model); + static void endCdataSectionCallback(void *userData); + static void endDoctypeDeclCallback(void *userData); + static void entityDeclCallback(void *userData, + const XML_Char *entityName, + int is_parameter_entity, + const XML_Char *value, + int value_length, + const XML_Char *base, + const XML_Char *systemId, + const XML_Char *publicId, + const XML_Char *notationName); + static void skippedEntityCallback(void *userData, const XML_Char *entityName, int is_parameter_entity); + static void startCdataSectionCallback(void *userData); + static void startDoctypeDeclCallback(void *userData, + const XML_Char *doctypeName, + const XML_Char *sysid, + const XML_Char *pubid, + int has_internal_subset); + static void xmlDeclCallback(void *userData, const XML_Char *version, + const XML_Char *encoding, + int standalone); +//@} + + +// utilities + static int skipWhiteSpace(const XML_Char*); + static const XML_Char* getAttribute(const XML_Char *matchingName, const XML_Char **atts); + static bool getIntegerAttribute(const XML_Char *matchingName, const XML_Char **atts, int& outAtt); + static bool getDoubleAttribute(const XML_Char *matchingName, const XML_Char **atts, double& outAtt); +}; + + +/** + subclass to support a hierarchy of parsers, in a sort of recursion or + 'nesting' approach, where a top-level parser might create sub-parsers + for part of a file. + + The currently active child parser is owned (mOwnedChild) and is deleted + by DeleteChild (invoked from the dtor) so error handling can propagate + up the tree, closing parsers, without leaks. + + \par Switching to sub-parsers + You can transfer to a sub-parser with + - new UserChildParser(this) // carries on using our parser, is self-deleting + - switchToNewSubParser( someVar = new UserChildParser(this) ) // if want to get values back after end parsing + + \warning You can accidentally invoke a new parser without it doing anything + - new UserChildParser() // will be new top-level parser, nothing to do with our XML + + \par Self-deletion + If you transfer control to a sub-parser with just new UserChildParser(this) then + it will be automatically self-deleting in its returnToParent method and + will invoke OwnedChildOrphansItself to clear our mOwnedChild. + + The reason for self-deletion being governed by a somewhat complex chain of + calls rather than simply a boolean flag is because expatpp has been in use + worldwide for many years and it was deemed too unfriendly to break code in + a manner which could cause unwanted side effects - the current approach safely + preserves self-deletion but also allows for expatpp to have parent parsers + own and delete children, without compiling with different options. + + \note + If you invoke a sub-parser with switchToNewSubParser( new UserChildParser() ); + then the user child parser will start with a new XML parser instance + created by the expatpp ctor. This is safe but slightly wasteful of processing + as the new parser will be discarded by BeAdopted(). + + \par Switching to child and explicitly deleting + switchToNewSubParser( somevar = new UserChildParser(this) ) allows you to get values + back out of the child parser, in the context of the parent, eg: + +\verbatim + +void MultiFilterParser::startElement(const XML_Char* name, const XML_Char **atts) +{ + if(strcmp(name,"FilterRequest")==0) { + switchToNewSubParser( + mCurrentFilterParser = new FilterRequestParser(this, atts) + ); // we own and will have to explicitly delete +... +} + +void MultiFilterParser::endElement(const XML_Char *name) +{ + if(strcmp(name,"FilterRequest")==0) { + assert(mCurrentFilterParser); + FilterClause* newClause = mCurrentFilterParser->orphanBuiltClause(); // retrieve data built by sub-parser +... + mCurrentFilterParser = 0; + DeleteChild(); + } +} +\endverbatim +*/ +class expatppNesting : public expatpp { + +public: + expatppNesting(expatppNesting* parent=0); ///< NOT a copy ctor!! this is a recursive situation + virtual ~expatppNesting(); + + void switchToNewSubParser( expatppNesting* pAdoptedChild ); + expatppNesting* returnToParent(); + +protected: + void BeAdopted(expatppNesting* adoptingParent); + void OwnedChildOrphansItself(expatppNesting* callingChild); + void RegisterWithParentXMLParser(); + virtual void AdoptChild(expatppNesting* adoptingChild); + virtual void DeleteChild(); + + int mDepth; + bool mSelfDeleting; ///< only valid if mParent not null + expatppNesting* mParent; ///< may be null the parent owns this object + expatppNesting* mOwnedChild; ///< owned, optional currently active child (auto_ptr not used to avoid STL dependency) + +public: +/// \name interface functions for callbacks +//@{ + static void nestedStartElementCallback(void* userData, const XML_Char* name, const XML_Char** atts); + static void nestedEndElementCallback(void* userData, const XML_Char* name); +//@} + + +/// \name overrideables to customise behaviour, must call parent +//@{ + virtual void SetupHandlers(); +//@} + +private: + // Forbid copy-construction and assignment, to prevent double-deletion of mOwnedChild + expatppNesting( const expatppNesting & ); + expatppNesting & operator=( const expatppNesting & ); +}; + + +// inlines + +// ------------------------------------------------------- +// e x p a t p p +// ------------------------------------------------------- +inline +expatpp::operator XML_Parser() const +{ + return mParser; +} + + +// ------------------------------------------------------- +// e x p a t p p N e s t i n g +// ------------------------------------------------------- +inline void +expatppNesting::OwnedChildOrphansItself(expatppNesting* callingChild) +{ + assert(callingChild==mOwnedChild); + mOwnedChild = 0; +} + + + +#endif // H_EXPATPP -- 1.7.11.7