Sophie

Sophie

distrib > Mandriva > 2011.0 > x86_64 > media > contrib-testing > by-pkgid > c16f5c215103bf1c5b8460cc792c7cdd > files > 5

zarafa-indexer-7.0.8-1.x86_64.rpm

# Database file containing a map of mime-types and extensions together
# with the corresponding command to parse the attachment into plain-text.
#
# The layout of each line is as followed:
#
#		<mime-type>;<extension>		`<command>`
#
# Each line can have as many mime-types and extensions as needed.
#
# The command must assume the attachment data is coming through /dev/stdin
# and should return the parsed data through /dev/stdout. When utilities
# require the attachment data to be stored on the harddisk the command
# `zmktemp` can be used. That script will write all attachment data in a
# temporary file and print the location of the file to /dev/stdout again.
#
# To supress logmessages from the indexer regarding file types which should
# not be indexed (like images) the command `echo -n` can be used.
#
# The command will be detected and executed by the attachments_parser script,
# to test the command the following can be done to check the result:
#
#		cat <attachment> | <command>
#
text/plain;txt	`cat /dev/stdin`
text/html;html;htm	`FILE=$(zmktemp); w3m -dump -s -O utf-8 $FILE`
application/xml;text/xml;xml	`xsltproc ${SCRIPTPATH}/xmltotext.xslt -`
application/pdf;pdf	`pdftotext -enc UTF-8 -q -nopgbrk $(zmktemp) /dev/stdout`
application/msword;doc `catdoc -s cp1252 -d utf-8 -f ascii -w /dev/stdin`
application/mspowerpoint;application/powerpoint;application/x-mspowerpoint;application/vnd.ms-powerpoint;ppt	`catppt -s cp1252 -d utf-8 /dev/stdin`
application/excel;application/x-excel;application/x-msexcel;application/vnd.ms-excel;xls	`xls2csv -s cp1252 -d utf-8 -c ' ' /dev/stdin`
application/vnd.openxmlformats-officedocument.wordprocessingml.document;docx	`zmktemp | unzip -p $(cat /dev/stdin) word/document.xml | xsltproc ${SCRIPTPATH}/xmltotext.xslt -`
application/vnd.openxmlformats-officedocument.spreadsheetml.sheet;xlsx	`zmktemp | unzip -o -qq $(cat /dev/stdin) -d $TMPDIR; for i in $(find $TMPDIR -name \*.xml); do xsltproc ${SCRIPTPATH}/xmltotext.xslt $i; done`
application/vnd.openxmlformats-officedocument.presentationml.presentation;pptx	`zmktemp | unzip -o -qq $(cat /dev/stdin) -d $TMPDIR; for i in $(find $TMPDIR -name \*.xml); do xsltproc ${SCRIPTPATH}/xmltotext.xslt $i; done`
application/vnd.oasis.opendocument.text;odt `zmktemp | unzip -p $(cat /dev/stdin) content.xml | xsltproc ${SCRIPTPATH}/xmltotext.xslt -`
application/vnd.oasis.opendocument.spreadsheet;ods	`zmktemp | unzip -p $(cat /dev/stdin) content.xml | xsltproc ${SCRIPTPATH}/xmltotext.xslt -`
application/vnd.oasis.opendocument.presentation;odp	`zmktemp | unzip -p $(cat /dev/stdin) content.xml | xsltproc ${SCRIPTPATH}/xmltotext.xslt -`
image/gif;image/jpeg;image/png;image/tiff;gif;jpg;jpeg;png;tif;tiff	`echo -n`

# fallback if the mimetype was not specified
application/octet-stream `FILE=$(zmktemp); attachments_parser $(file --mime-type $FILE | awk '{print $2}' | sed -e 's%application/octet-stream%stop_loop%') $FILE`