#!/usr/bin/python # -*- coding: utf-8 -*- # Copyright (C) 2007 Søren Roug, European Environment Agency # # This library is free software; you can redistribute it and/or # modify it under the terms of the GNU Lesser General Public # License as published by the Free Software Foundation; either # version 2.1 of the License, or (at your option) any later version. # # This library is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU # Lesser General Public License for more details. # # You should have received a copy of the GNU Lesser General Public # License along with this library; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA # # Contributor(s): # from odf.opendocument import OpenDocumentText from odf import style, text, dc, meta import sys, getopt, time def usage(): sys.stderr.write("""Usage: %s [-l language] [-e encoding] [-T] [-a author] \t[-c creation_date] [-d description] [-n etext] [-p publisher] [-t title] inputfile\n""" % sys.argv[0]) try: opts, args = getopt.getopt(sys.argv[1:], "a:n:c:d:e:l:p:t:T", ["author=", "date=", "created=", "description=", "number=", "title=", "language=", "publisher=", "encoding="]) except getopt.GetoptError: usage() sys.exit(2) language = None description = None encoding = 'cp1252' # Codepage 1252 is a superset of ASCII and ISO-8859-1 argencoding = 'utf-8' creator = "" creationdate = None title = "" ebooknum = None publisher = "Project Gutenberg" copyrights = "http://www.gutenberg.org/license" fn_is_title = False for o, a in opts: if o in ("-l", "--language"): if len(a) > 3 and a[2] != '-' and a[3] != '-' or len(a) > 6: sys.stderr.write("""Language must be a two or three letter language code optionally \tfollowed by a hyphen and a two-letter country code""") sys.exit(2) language = a elif o in ("-e", "--encoding"): encoding = a elif o in ("-a", "--author"): creator = unicode(a, argencoding) elif o in ("-d", "--description"): description = a elif o in ("-c", "--date", "--created"): if len(a) > 10 and a[10] != "T": sys.stderr.write("""Date must be in ISO8601 format (YYYY-MM-DDTHH:MM:SS)\n""") sys.exit(2) if len(a) < 10 or (len(a) == 10 and a[4] != "-" and a[7] != "-"): sys.stderr.write("""Date must be in ISO8601 format (YYYY-MM-DD)\n""") sys.exit(2) creationdate = a elif o in ("-p", "--publisher"): publisher = a elif o in ("-n", "--number"): ebooknum = unicode(a, argencoding) elif o in ("-t", "--title"): title = unicode(a, argencoding) elif o == "-T": fn_is_title = True if len(args) != 1: usage() sys.exit(2) doc=OpenDocumentText() textdoc = doc.text if creator != "": doc.meta.addElement(meta.InitialCreator(text=creator)) doc.meta.addElement(dc.Creator(text=creator)) if creationdate is not None: doc.meta.addElement(meta.CreationDate(text=creationdate)) doc.meta.addElement(dc.Date(text=creationdate)) if description is not None: doc.meta.addElement(dc.Description(text=description)) if title != "": doc.meta.addElement(dc.Title(text=title)) if language is not None: doc.meta.addElement(dc.Language(text=language)) if publisher is not None: # doc.meta.addElement(dc.Publisher(text=publisher)) doc.meta.addElement(meta.UserDefined(name="Publisher", text=publisher)) if copyrights is not None: # doc.meta.addElement(dc.Rights(text=copyrights)) doc.meta.addElement(meta.UserDefined(name="Rights", text=copyrights)) if ebooknum is not None: doc.meta.addElement(meta.UserDefined(name="EText", text=ebooknum)) arial = style.FontFace(name="Arial", fontfamily="Arial", fontfamilygeneric="swiss", fontpitch="variable") doc.fontfacedecls.addElement(arial) # Paragraph styles standardstyle = style.Style(name="Standard", family="paragraph") standardstyle.addElement(style.ParagraphProperties(marginbottom="0cm", margintop="0cm" )) doc.styles.addElement(standardstyle) h1style = style.Style(name="Heading 1", family="paragraph", defaultoutlinelevel="1") h1style.addElement(style.TextProperties(attributes={'fontsize':"20pt", 'fontweight':"bold"})) doc.styles.addElement(h1style) textbodystyle = style.Style(name="Text body", family="paragraph", parentstylename=standardstyle) textbodystyle.addElement(style.ParagraphProperties(attributes={'marginbottom':"0.212cm", 'margintop':"0cm", 'textalign':"justify", 'justifysingleword':"false"})) doc.styles.addElement(textbodystyle) subtitlestyle = style.Style(name="Subtitle", family="paragraph", nextstylename=textbodystyle) subtitlestyle.addElement(style.ParagraphProperties(textalign="center") ) subtitlestyle.addElement(style.TextProperties(fontsize="14pt", fontstyle="italic", fontname="Arial")) doc.styles.addElement(subtitlestyle) titlestyle = style.Style(name="Title", family="paragraph", nextstylename=subtitlestyle) titlestyle.addElement(style.ParagraphProperties(textalign="center") ) titlestyle.addElement(style.TextProperties(fontsize="18pt", fontweight="bold", fontname="Arial")) doc.styles.addElement(titlestyle) # Text styles emphasisstyle = style.Style(name="Emphasis",family="text") emphasisstyle.addElement(style.TextProperties(fontstyle="italic")) doc.styles.addElement(emphasisstyle) # Make the Gutenberg sections grey sectstyle = style.Style(name="Sect1", family="section") sectstyle.addElement(style.SectionProperties(backgroundcolor="#e6e6e6")) doc.automaticstyles.addElement(sectstyle) FULLLINE=55 paragraph=[] def addparagraph(section): """ Join the paragraph list and add it to the section """ global paragraph p = ' '.join(paragraph) textsegs = p.split('_') para = text.P(stylename=textbodystyle) section.addElement(para) if len(textsegs) > 1 and (len(textsegs) % 2) == 1: # We have found some kursive text segments for i in range(len(textsegs)): if len(textsegs[i]) > 0: if (i % 2) == 1: y = text.Span(stylename=emphasisstyle, text=textsegs[i]) para.addElement(y) else: para.addText(textsegs[i]) else: para.addText(p) def cleantext(s): if s[0] == '"' or s[-1] == '"': ls=list(s) if ls[0] == '"': ls[0] = u'â' if ls[-1] == '"': ls[-1] = u'â' s = ''.join(ls) s = s.replace('" ',u'â ') s = s.replace(' "',u' â') s = s.replace("'m",u"âm") # I'm s = s.replace("'s",u"âs") # genitive case s = s.replace("'t",u"ât") # don't, doesn't, haven't s = s.replace("'S",u"âS") # genitive case s = s.replace("'T",u"âT") # DON'T, etc s = s.replace("l'",u"lâ") # French s = s.replace("d'",u"dâ") # French if s.find('---') < 0: # Don't replace double dash for lines s = s.replace('--',u'â') return s def pretext(section, line, linelen): section.addElement(text.P(stylename=standardstyle, text=line)) def posttext(section, line, linelen): section.addElement(text.P(stylename=standardstyle, text=line)) def mainpart(section, line, linelen): global paragraph if linelen > 0 and len(paragraph) == 0 and \ line.upper() == line and line.upper() != line.lower(): # Headlines are always upper case style = h1style l = cleantext(line) section.addElement(text.H(outlinelevel=1, stylename=h1style, text=l)) elif linelen >= FULLLINE: # In the middle of a paragraph paragraph.append(cleantext(line)) elif linelen == 0: # End of paragraph if len(paragraph) > 0: addparagraph(section) paragraph=[] elif linelen < FULLLINE and len(paragraph) > 0: # Short tail of paragraph paragraph.append(cleantext(line)) else: if line == title or line == title + " by " + creator: section.addElement(text.P( stylename=titlestyle, text=cleantext(line))) return if line == "by" or line == creator: section.addElement(text.P( stylename=subtitlestyle, text=cleantext(line))) return if len(paragraph) > 0: addparagraph(section) paragraph=[] section.addElement(text.P(stylename=textbodystyle, text=cleantext(line))) PRETEXT = 1 MAINPART = 2 POSTTEXT = 3 textpart = PRETEXT # Start in the preamble section = text.Section(stylename=sectstyle, name="preamble") #, display="none") textdoc.addElement(section) filename = args[0] if fn_is_title and title is not None and title != "": outfn = title else: suffixi = filename.rfind(".") if suffixi > 1: outfn = filename[:suffixi] else: outfn = "interimname" f = open(filename) for rawline in f: line = unicode(rawline.strip(), encoding) linelen = len(line) if line.find("*** END OF TH") == 0: textpart = POSTTEXT section = text.Section(stylename=sectstyle, name="license") #, display="none") textdoc.addElement(section) if textpart == PRETEXT: pretext(section, line, linelen) if line.find("*** START OF TH") == 0 or \ line.find("*END THE SMALL PRINT!") == 0 or \ line.find("*END*THE SMALL PRINT!") == 0: textpart = MAINPART elif textpart == MAINPART: section = textdoc mainpart(section, line, linelen) else: posttext(section, line, linelen) # print d.contentxml() doc.save(outfn, True)