diff -r 9971ff84a393 -r 5efd68af1819 sdext/source/pdfimport/wrapper/wrapper.cxx --- a/sdext/source/pdfimport/wrapper/wrapper.cxx Fri Feb 05 07:54:06 2010 +0100 +++ b/sdext/source/pdfimport/wrapper/wrapper.cxx Thu Feb 18 15:09:06 2010 +0100 @@ -201,6 +201,60 @@ void parseLine( const ::rtl::OString& rLine ); }; + +namespace +{ + + /** Unescapes line-ending characters in input string. These + characters are encoded as pairs of characters: '\\' 'n', resp. + '\\' 'r'. This function converts them back to '\n', resp. '\r'. + */ + rtl::OString lcl_unescapeLineFeeds(const rtl::OString& i_rStr) + { + const size_t nOrigLen(sal::static_int_cast<size_t>(i_rStr.getLength())); + const sal_Char* const pOrig(i_rStr.getStr()); + sal_Char* const pBuffer(new sal_Char[nOrigLen + 1]); + + const sal_Char* pRead(pOrig); + sal_Char* pWrite(pBuffer); + const sal_Char* pCur(pOrig); + while ((pCur = strchr(pCur, '\\')) != 0) + { + const sal_Char cNext(pCur[1]); + if (cNext == 'n' || cNext == 'r' || cNext == '\\') + { + const size_t nLen(pCur - pRead); + strncpy(pWrite, pRead, nLen); + pWrite += nLen; + *pWrite = cNext == 'n' ? '\n' : (cNext == 'r' ? '\r' : '\\'); + ++pWrite; + pCur = pRead = pCur + 2; + } + else + { + // Just continue on the next character. The current + // block will be copied the next time it goes through the + // 'if' branch. + ++pCur; + } + } + // maybe there are some data to copy yet + if (sal::static_int_cast<size_t>(pRead - pOrig) < nOrigLen) + { + const size_t nLen(nOrigLen - (pRead - pOrig)); + strncpy(pWrite, pRead, nLen); + pWrite += nLen; + } + *pWrite = '\0'; + + rtl::OString aResult(pBuffer); + delete[] pBuffer; + return aResult; + } + +} + + ::rtl::OString Parser::readNextToken() { OSL_PRECOND(m_nCharIndex!=-1,"insufficient input"); @@ -326,7 +380,7 @@ readDouble(aUnoMatrix.m10); readDouble(aUnoMatrix.m11); - rtl::OString aChars = m_aLine.copy( m_nCharIndex ); + rtl::OString aChars = lcl_unescapeLineFeeds( m_aLine.copy( m_nCharIndex ) ); // chars gobble up rest of line m_nCharIndex = -1; @@ -480,7 +534,7 @@ readInt32(nFileLen); nSize = nSize < 0.0 ? -nSize : nSize; - aFontName = m_aLine.copy( m_nCharIndex ); + aFontName = lcl_unescapeLineFeeds( m_aLine.copy( m_nCharIndex ) ); // name gobbles up rest of line m_nCharIndex = -1; @@ -672,8 +726,9 @@ readDouble(aBounds.Y2); m_pSink->hyperLink( aBounds, - rtl::OStringToOUString( m_aLine.copy(m_nCharIndex), - RTL_TEXTENCODING_UTF8 )); + rtl::OStringToOUString( lcl_unescapeLineFeeds( + m_aLine.copy(m_nCharIndex) ), + RTL_TEXTENCODING_UTF8 ) ); // name gobbles up rest of line m_nCharIndex = -1; } @@ -710,7 +765,7 @@ OSL_PRECOND( m_pSink, "Invalid sink" ); OSL_PRECOND( m_pErr, "Invalid filehandle" ); OSL_PRECOND( m_xContext.is(), "Invalid service factory" ); - + m_nNextToken = 0; m_nCharIndex = 0; m_aLine = rLine; uno::Reference<rendering::XPolyPolygon2D> xPoly; const ::rtl::OString& rCmd = readNextToken(); diff -r 9971ff84a393 -r 5efd68af1819 sdext/source/pdfimport/xpdfwrapper/pdfioutdev_gpl.cxx --- a/sdext/source/pdfimport/xpdfwrapper/pdfioutdev_gpl.cxx Fri Feb 05 07:54:06 2010 +0100 +++ b/sdext/source/pdfimport/xpdfwrapper/pdfioutdev_gpl.cxx Thu Feb 18 15:09:06 2010 +0100 @@ -84,10 +84,150 @@ return fabs(val) < 0.0000001 ? 0.0 : val; } -const char* escapeLineFeed( const char* pStr ) +namespace { - // TODO(Q3): Escape linefeeds - return pStr; + +/** Handles transfer of a string that may or may not have been allocated + dynamically. + + Copy semantics is the same as for std::auto_ptr: the copy takes over + ownership of the string. + */ +class TemporaryString +{ +public: + explicit TemporaryString(const char* i_pString, bool i_bNeedsDelete = false); + TemporaryString(TemporaryString& io_rOther); + TemporaryString& operator=(TemporaryString& io_rOther); + ~TemporaryString(); + void swap(TemporaryString& io_rOther); + + /** Returns the contained string. + */ + const char* getString() const; +private: + const char* m_pString; + bool m_bNeedsDelete; +}; + +TemporaryString::TemporaryString(const char* const i_pString, const bool i_bNeedsDelete) + : m_pString(i_pString) + , m_bNeedsDelete(i_bNeedsDelete) +{ +} + +TemporaryString::TemporaryString(TemporaryString& io_rOther) + : m_pString(io_rOther.m_pString) + , m_bNeedsDelete(io_rOther.m_bNeedsDelete) +{ + io_rOther.m_bNeedsDelete = false; +} + +TemporaryString& TemporaryString::operator=(TemporaryString& io_rOther) +{ + TemporaryString aTmp(io_rOther); + this->swap(aTmp); + return *this; +} + +void TemporaryString::swap(TemporaryString& io_rOther) +{ + const bool bTmp(io_rOther.m_bNeedsDelete); + io_rOther.m_bNeedsDelete = this->m_bNeedsDelete; + this->m_bNeedsDelete = bTmp; + const char* const pTmp(io_rOther.m_pString); + io_rOther.m_pString = this->m_pString; + this->m_pString = pTmp; +} + +TemporaryString::~TemporaryString() +{ + if (m_bNeedsDelete) + { + delete[] m_pString; + } +} + +const char* TemporaryString::getString() const +{ + return m_pString; +} + + +/** Escapes line-ending characters (\n and \r) in input string. The function + may allocate memory, but the deletion of it is handled automatically by + the returned proxy object. + */ +TemporaryString lcl_escapeLineFeeds(const char* const i_pStr) +{ + const size_t nLength(strlen(i_pStr)); + const char* const pEscape = "\n\r\\"; + + if (strcspn(i_pStr, pEscape) == nLength) + { + TemporaryString aTmp(i_pStr); + return aTmp; + } + + size_t nEscapeCount(0); + { + // count the chars to escape + const char* pRead(i_pStr); + size_t nRemaining(nLength); + while (nRemaining != 0) + { + size_t nSpan(0); + if (*pRead == '\n' || *pRead == '\r' || *pRead == '\\') + { + nSpan = strspn(pRead, pEscape); + nEscapeCount += nSpan; + } + else + { + nSpan = strcspn(pRead, pEscape); + } + pRead += nSpan; + nRemaining -= nSpan; + } + } + + char* const pBuffer(new char[nLength + nEscapeCount + 1]); + + { + // escape the newlines + char* pWrite(pBuffer); + const char* pRead(i_pStr); + size_t nRemaining(nLength); + while (nRemaining != 0) + { + if (*pRead == '\n' || *pRead == '\r' || *pRead == '\\') + { + while (*pRead == '\n' || *pRead == '\r' || *pRead == '\\') + { + *pWrite = '\\'; + ++pWrite; + *pWrite = *pRead == '\n' ? 'n' : (*pRead == '\r' ? 'r' : '\0'); + ++pWrite; + ++pRead; + --nRemaining; + } + } + else + { + const size_t nSpan(strcspn(pRead, pEscape)); + strncpy(pWrite, pRead, nSpan); + pWrite += nSpan; + pRead += nSpan; + nRemaining -= nSpan; + } + } + *pWrite = '\0'; + } + + TemporaryString aRetVal(pBuffer, true); + return aRetVal; +} + } /// for the temp char buffer the header gets snprintfed in @@ -475,7 +615,7 @@ normalize(y1), normalize(x2), normalize(y2), - escapeLineFeed(pURI) ); + lcl_escapeLineFeeds(pURI).getString() ); } } @@ -647,7 +787,7 @@ aFont.isUnderline, normalize(state->getTransformedFontSize()), nEmbedSize, - escapeLineFeed(aFont.familyName.getCString()) ); + lcl_escapeLineFeeds(aFont.familyName.getCString()).getString() ); } printf( "\n" ); @@ -771,7 +911,7 @@ for( int i=0; i<uLen; ++i ) { buf[ m_pUtf8Map->mapUnicode(u[i], buf, sizeof(buf)-1) ] = 0; - printf( "%s", escapeLineFeed(buf) ); + printf( "%s", lcl_escapeLineFeeds(buf).getString() ); } printf( "\n" );