Sophie

Sophie

distrib > Mageia > 7 > armv7hl > media > core-release > by-pkgid > 795bf9786a4525b11afca6ed2c26e2af > files > 61

python-odf-doc-1.4.0-1.mga7.noarch.rpm

#!/usr/bin/env python
# -*- coding: utf-8 -*-
# Copyright (C) 2007 Søren Roug, European Environment Agency
#
# This is free software.  You may redistribute it under the terms
# of the Apache license and the GNU General Public License Version
# 2 or at your option any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public
# License along with this program; if not, write to the Free Software
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
#
# Contributor(s):
#
#
# Syntax Highlighting
# Originally from Peter Clive Wilkinson (http://www.petersblog.org/node/763)
#
import os, sys, re, getopt
from odf.opendocument import OpenDocumentText
from odf.style import FontFace, Style, TextProperties, ParagraphProperties
from odf.text import P, Span, S

class Highlight:
    """
    Do syntax highlighting.
    """

    courierfont = FontFace(name="Courier", fontfamily="Courier",
            fontadornments="Normal", fontfamilygeneric="modern", fontpitch="fixed")
    
    #--- Paragraph style --

    programliststyle = Style(name="Program Listing", family="paragraph")
    programliststyle.addElement(ParagraphProperties(border="0.002cm solid #000000", margin="0cm", padding="0.2cm"))
    programliststyle.addElement(TextProperties(fontname="Courier", fontsize="9pt", language="none", country="none"))

    #--- Text styles --
    puncstyle = Style(name="Highlighted Punctuation", family="text")
    puncstyle.addElement(TextProperties(fontweight="bold")) # Bold

    numberstyle = Style(name="Highlighted Number", family="text")
    numberstyle.addElement(TextProperties(color="#ff0000")) # Red

    keywordstyle = Style(name="Highlighted Keyword", family="text")
    keywordstyle.addElement(TextProperties(color="#b218b2", fontweight="bold")) # Blue, bold

    variablestyle = Style(name="Highlighted Magic", family="text")
    variablestyle.addElement(TextProperties(color="#0000ff")) # Blue

    tagstyle = Style(name="Highlighted Tag", family="text")
    tagstyle.addElement(TextProperties(color="#800000")) # Darkred

    attrstyle = Style(name="Highlighted Tag", family="text")
    attrstyle.addElement(TextProperties(color="#008000", fontweight="bold")) # Dark green bold

    stringstyle = Style(name="Highlighted String", family="text")
    stringstyle.addElement(TextProperties(color="#800000")) # Red

    commentstyle = Style(name="Highlighted Comment", family="text")
    commentstyle.addElement(TextProperties(color="#0000ff", fontstyle="italic")) # Blue, Italic

    preprocstyle = Style(name="Highlighted Preprocessing", family="text")
    preprocstyle.addElement(TextProperties(color="#ff00ff", fontstyle="italic")) # Magenta, Italic

    def __init__(self, strMode):
        """
        Initialise highlighter: strMode = language (PYTHON, C, CPP, PHP, HTML)
        """
         
        self.textdoc = OpenDocumentText()

        self.textdoc.fontfacedecls.addElement(self.courierfont)

        self.textdoc.styles.addElement(self.programliststyle)
        self.textdoc.styles.addElement(self.puncstyle)
        self.textdoc.styles.addElement(self.numberstyle)
        self.textdoc.styles.addElement(self.keywordstyle)
        self.textdoc.styles.addElement(self.variablestyle)
        self.textdoc.styles.addElement(self.tagstyle)
        self.textdoc.styles.addElement(self.attrstyle)
        self.textdoc.styles.addElement(self.stringstyle)
        self.textdoc.styles.addElement(self.commentstyle)
        self.textdoc.styles.addElement(self.preprocstyle)

        self.strSpanStyle = None
        self.currPara = P(stylename=self.programliststyle)
        self.textdoc.text.addElement(self.currPara)
        self.currSpan = None
        if strMode == 'CPP':
            strMode = 'C'
            self.strSuppressTokens = []
        elif strMode == 'C':
            self.strSuppressTokens = ['CPPKEYWORD']
        else:
            self.strSuppressTokens = []

        self.strMode = strMode

    def PythonHighlightToken(self, strTok, oMatch, strStyle):
        """
        Callback for python specific highlighting.
        """
        #
        # Input matches this type.
        #
        strValue = oMatch.group()

        if strTok == 'MULTILINESTRING':
            #
            # If not inside a multiline string then start one now.
            #
            self.ChangeStyle(strStyle)
            self.WriteContent(strValue)
            #
            # Remember you are in a string and remember how it was
            # started (""" vs ''')
            #
            self.strMultilineString = oMatch.group(1)
            return 'PythonMultilineString'

        elif strTok == 'ENDMULTILINESTRING':
            #
            # Multiline Token found within a multiline string
            #
            if oMatch.group(1) == self.strMultilineString:
                #
                # Token is end of multiline so stop here.
                #
                self.WriteMultiline(strValue)
                self.strMultilineString = ''
                return 'PYTHON'

        self.ChangeStyle(strStyle)
        self.WriteContent(strValue)

    def CHighlightToken(self, strTok, oMatch, strStyle):
        """
        Callback for C specific highlighting.
        """
        #
        # Input matches this type.
        #
        strValue = oMatch.group()

        #
        # Not in multiline mode so change display style as appropriate
        # and output the text.
        #
        self.ChangeStyle(strStyle)
        self.WriteContent(strValue)

    def PHPHighlightToken(self, strTok, oMatch, strStyle):
        """
        Callback for PHP specific highlighting.
        """
        #
        # Input matches this type.
        #
        strValue = oMatch.group()

        if strTok == 'MULTILINESTRING':
            #
            # If not inside a multiline string then start one now.
            #
            self.ChangeStyle(strStyle)
            self.WriteContent(strValue)
            #
            # Remember you are in a string and remember how it was
            # started (""" vs ''')
            #
            self.strMultilineString = oMatch.group(1)
            return 'PHPMultilineString'

        elif strTok == 'ENDMULTILINESTRING':
            #
            # Multiline Token found within a multiline string
            #
            if oMatch.group(1) == self.strMultilineString:
                #
                # Token is end of multiline so stop here.
                #
                self.WriteMultiline(strValue)
                self.strMultilineString = ''
                return 'PHP'

        self.ChangeStyle(strStyle)
        self.WriteContent(strValue)

        if strTok == 'GOTOHTML':
            #
            # Embedded HTML
            #
            return 'HTML'
        else:
            return None

    def HTMLHighlightToken(self, strTok, oMatch, strStyle):
        """
        Callback for HTML specific highlighting.
        """
        #
        # Input matches this type.
        #
        strValue = oMatch.group()
        self.ChangeStyle(strStyle)
        self.WriteContent(strValue)

        if strTok == 'TAG':
            #
            # Change to mode 1, 'within tag'.
            #
            return 'HTMLTag'

        elif strTok == 'ENDTAG':
            #
            # Change to mode 1, 'within tag'.
            #
            return 'HTML'

        elif strTok == 'GOTOPHP':
            #
            # Embedded PHP
            #
            return 'PHP'

        else:
            #
            # No state change.
            #
            return None

    oStyles = {
        'PYTHON': ( PythonHighlightToken,
            (
                ('PUNC', re.compile( r'[-+*!|&^~/%\=<>\[\]{}(),.:]'), puncstyle),
                ('NUMBER', re.compile( r'0x[0-9a-fA-F]+|[+-]?\d+(\.\d+)?([eE][+-]\d+)?|\d+'), numberstyle),
                ('KEYWORD', re.compile( r'(def|class|break|continue|del|exec|finally|pass|' +
                                        r'print|raise|return|try|except|global|assert|lambda|' +
                                        r'yield|for|while|if|elif|else|and|in|is|not|or|import|' +
                                        r'from|True|False)(?![a-zA-Z0-9_])'), keywordstyle),
                ('MAGIC', re.compile( r'self|None'), variablestyle),
                ('MULTILINESTRING', re.compile( r'r?u?(\'\'\'|""")'), stringstyle),
                ('STRING', re.compile( r'r?u?\'(.*?)(?<!\\)\'|"(.*?)(?<!\\)"'), stringstyle),
                ('IDENTIFIER', re.compile( r'[a-zA-Z_][a-zA-Z0-9_]*'), None),
                ('COMMENT', re.compile( r'\#.*'), commentstyle),
                ('NEWLINE', re.compile( r'\r?\n'), 'NewPara'),
                ('WHITESPACE', re.compile( r'[   ]+'), 'Keep'),
            # if all else fails...
                ('UNKNOWN', re.compile( r'.'), None)
            )),

        'PythonMultilineString': ( PythonHighlightToken,
            (
                ('ENDMULTILINESTRING', re.compile( r'.*?("""|\'\'\')', re.DOTALL), stringstyle),
                ('UNKNOWN', re.compile( r'.'), 'Keep')
            )),

        'C': ( CHighlightToken,
            (
                ('COMMENT', re.compile( r'//.*\r?\n'), commentstyle),
                ('MULTILINECOMMENT', re.compile( r'/\*.*?\*/', re.DOTALL), commentstyle),
                ('PREPROCESSOR', re.compile( r'\s*#.*?[^\\]\s*\n', re.DOTALL), preprocstyle),
                ('PUNC', re.compile( r'[-+*!&|^~/%\=<>\[\]{}(),.:]'), puncstyle),
                ('NUMBER', re.compile( r'0x[0-9a-fA-F]+|[+-]?\d+(\.\d+)?([eE][+-]\d+)?|\d+'),
                                        numberstyle),
                ('KEYWORD', re.compile( r'(sizeof|int|long|short|char|void|' +
                                        r'signed|unsigned|float|double|' +
                                        r'goto|break|return|continue|asm|' +
                                        r'case|default|if|else|switch|while|for|do|' +
                                        r'struct|union|enum|typedef|' +
                                        r'static|register|auto|volatile|extern|const)(?![a-zA-Z0-9_])'), keywordstyle),
                ( 'CPPKEYWORD', re.compile( r'(class|private|protected|public|template|new|delete|' +
                                            r'this|friend|using|inline|export|bool|throw|try|catch|' +
                                            r'operator|typeid|virtual)(?![a-zA-Z0-9_])'), keywordstyle),
                ('STRING', re.compile( r'r?u?\'(.*?)(?<!\\)\'|"(.*?)(?<!\\)"'), stringstyle),
                ('IDENTIFIER', re.compile( r'[a-zA-Z_][a-zA-Z0-9_]*'), None),
                ('NEWLINE', re.compile( r'\r?\n'), 'NewPara'),
                ('WHITESPACE', re.compile( r'[   ]+'), 'Keep'),
                ('UNKNOWN', re.compile( r'.'), None)
            )),

        'PHP': ( PHPHighlightToken,
            (
                ('COMMENT', re.compile( r'//.*\r?\n'), commentstyle),
                ('MULTILINECOMMENT', re.compile( r'/\*.*?\*/', re.DOTALL), commentstyle),
                ('MULTILINESTRING', re.compile( r'<<<\s*([a-zA-Z0-9_]+)'), stringstyle),
                ('GOTOPHP', re.compile( r'<\?php'), stringstyle),
                ('PUNC', re.compile( r'[-+*!&|^~/%\=<>\[\]{}(),.:]'), puncstyle),
                ('NUMBER', re.compile( r'0x[0-9a-fA-F]+|[+-]?\d+(\.\d+)?([eE][+-]\d+)?|\d+'),
                                        numberstyle),
                ('KEYWORD', re.compile( r'(declare|else|enddeclare|endswitch|elseif|endif|if|switch|' +
                                        r'as|do|endfor|endforeach|endwhile|for|foreach|while|' +
                                        r'case|default|switch|function|return|break|continue|exit|' +
                                        r'var|const|boolean|bool|integer|int|real|double|float|string|' +
                                        r'array|object|NULL|extends|implements|instanceof|parent|self|' +
                                        r'include|require|include_once|require_once|new|true|false)(?![a-zA-Z0-9_])'), keywordstyle),

                ('STRING', re.compile( r'r?u?\'(.*?)(?<!\\)\'|"(.*?)(?<!\\)"'), stringstyle),
                ('VARIABLE', re.compile( r'\$[a-zA-Z_][a-zA-Z0-9_]*'), variablestyle),
                ('IDENTIFIER', re.compile( r'[a-zA-Z_][a-zA-Z0-9_]*'), None),
                ('WHITESPACE', re.compile( r'[   \r\n]+'), 'Keep'),
                ('GOTOHTML', re.compile( r'\?>'), stringstyle),
                ('UNKNOWN', re.compile( r'.'), None)
            )),

        'PHPMultilineString': ( PHPHighlightToken,
            (
                ('ENDMULTILINESTRING', re.compile( r'.*?\n([a-zA-Z0-9_]+)', re.DOTALL), stringstyle),
                ('UNKNOWN', re.compile( r'.*?(?!\n)'), 'Keep')
            )),

        'HTML': ( HTMLHighlightToken,
            # Mode 0: just look for tags
            (
                ('COMMENT', re.compile( r'<!--[^>]*-->|<!>'), commentstyle),
                ('XMLCRAP', re.compile( r'<![^>]*>'), preprocstyle),
                ('SCRIPT', re.compile( r'<script .*?</script>', re.IGNORECASE + re.DOTALL), tagstyle),
                ('TAG', re.compile( r'</?\s*[a-zA-Z0-9]+'), tagstyle),
                ('GOTOPHP', re.compile( r'<\?php'), stringstyle),
                ('NEWLINE', re.compile( r'\r?\n'), 'NewPara'),
                ('UNKNOWN', re.compile( r'[^<]*'), None)
            )),
            # Mode 1: within tags,
        'HTMLTag': ( HTMLHighlightToken,
            (
                ('ENDTAG', re.compile( r'>'), tagstyle),
                ('ATTRIBUTE', re.compile( r'[a-zA-Z][a-zA-Z0-9:]*='), attrstyle),
                ('VALUE', re.compile( r'"[^"]*"'), stringstyle),
                ('NEWLINE', re.compile( r'\r?\n'), 'NewPara'),
                ('WHITESPACE', re.compile( r'[ \t\f\v]+'), None),
                ('UNKNOWN', re.compile( r'.'), None)
            ))
    }

    def generatedoc(self, strData):
        """
        Syntax highlight some python code.
        Returns html version of code.
        """
        i = 0

        strMode = self.strMode

        #
        # While input is not exhausted...
        #
        while i < len(strData):
            #
            # Compare current position with all possible display types.
            #
            try:
                for strTok, oRE, strStyle in Highlight.oStyles[strMode][1]:
                    if not strTok in self.strSuppressTokens:
                        oMatch = oRE.match(strData, i)
                        if oMatch:
                            strNewMode = Highlight.oStyles[strMode][0](self, strTok, oMatch, strStyle)
                            if strNewMode != None:
                                strMode = strNewMode

                            i += len(oMatch.group())
                            break
                else:
                    #
                    # Token not found so dump out raw text. This doesn't have to be bullet proof.
                    #
                    self.ChangeStyle(None)
                    self.WriteContent(strData[i])
                    i += 1
            except:
                raise
        #
        # Terminate any styles in use.
        #
        self.ChangeStyle(None)

        #
        # Expand tabs to 4 spaces.
        # Doesn't matter if this number is wrong, the indentation will be butt ugly anyhow.
        #
        return self.textdoc

    def WriteSingleline(self, parent, data):
        ls = len(data)
        cnt = 0
        textstart = 0
        i = -1
        for i in xrange(ls):
            if data[i] == ' ':
                if cnt == 0:
                    # We found the first space. Now print the text before
                    parent.addText(data[textstart:i])
                    cnt = 0
                    textstart = i
                cnt = cnt+1
            else:
            # We didn't see a space
            # If there are unprinted spaces, print them now, if there are, then we're at text-start
                if cnt > 0:
                    parent.addText(' ')
                if cnt > 1:
                    parent.addElement(S(c=cnt-1))
                if cnt > 0:
                    cnt = 0
                    textstart = i
        if cnt > 0:
            parent.addText(' ')
        if cnt > 1:
            parent.addElement(S(c=cnt-1))
        elif i != -1:
            parent.addText(data[textstart:i+1])


    def WriteMultiline(self, data):
        lines = data.split('\n')
        self.currPara.addText(lines[0])
        for line in lines[1:]:
            self.currPara = P(stylename=self.programliststyle)
            self.textdoc.text.addElement(self.currPara)
            self.currSpan = Span(stylename=self.strSpanStyle)
            self.WriteSingleline(self.currSpan, line)
            self.currPara.addElement(self.currSpan)

    def WriteContent(self, data):
        """
        Write the content, but convert spaces to <text:s> first
        """
#       re.compile( r'( )\1+(.+)')
        if self.currSpan is None:
            self.WriteSingleline(self.currPara, data)
        else:
            self.WriteSingleline(self.currSpan, data)
        
    def ChangeStyle(self, strStyle):
        """
        Generate output to change from existing style to another style only.
        """
        #
        # Output minimal formatting code: only output anything if the style has
        # actually  changed.
        #
        if self.strSpanStyle != strStyle:
            if strStyle == 'NewPara':
                self.currPara = P(stylename=self.programliststyle)
                self.textdoc.text.addElement(self.currPara)
                self.currSpan = None
                self.strSpanStyle = None
            elif strStyle != 'Keep':
                if strStyle is None:
                    self.currSpan = None
                else:
                    self.currSpan = Span(stylename=strStyle)
                    self.currPara.addElement(self.currSpan)
                self.strSpanStyle = strStyle

def usage():
   sys.stderr.write("Usage: %s [-l language] [-e encoding] inputfile outputfile\n" % sys.argv[0])

try:
    opts, args = getopt.getopt(sys.argv[1:], "l:e:", ["language=", "encoding="])

except getopt.GetoptError:
    usage()
    sys.exit(2)

language = None
encoding = 'utf-8'
for o, a in opts:
    if o in ("-l", "--language"):
        language = a.upper()
    if o in ("-e", "--encoding"):
        encoding = a

if len(args) != 2:
    usage()
    sys.exit(2)

suffixes = {
    '.py': 'PYTHON',
    '.xhtml': 'HTML',
    '.html': 'HTML',
    '.htm': 'HTML',
    '.c': 'C',
    '.c++': 'CPP',
    '.php': 'PHP'
}

inputfile = args[0]
outputfile = args[1]
if language is None:
    try:
        suffix = inputfile.lower().rindex('.')
        language = suffixes[inputfile[suffix:]]
    except:
        usage()
        sys.exit(2)

data = unicode(open(inputfile).read(),encoding)
Highlighted = Highlight(language).generatedoc(data)

Highlighted.save(args[1])