diff -Nrup a/data/scripts/mobi_lib/mobi_dict.py b/data/scripts/mobi_lib/mobi_dict.py --- a/data/scripts/mobi_lib/mobi_dict.py 2019-08-10 20:20:51.000000000 +0200 +++ b/data/scripts/mobi_lib/mobi_dict.py 2021-02-01 19:06:04.389959908 +0100 @@ -1,4 +1,4 @@ -#!/usr/bin/env python2 +#!/usr/bin/python3 # vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai DEBUG_DICT = False @@ -27,37 +27,37 @@ class dictSupport: decodeInflection = True if metaOrthIndex != 0xFFFFFFFF: - print "Info: Document contains orthographic index, handle as dictionary" + print("Info: Document contains orthographic index, handle as dictionary") if metaInflIndex == 0xFFFFFFFF: decodeInflection = False else: metaInflIndexData = sect.loadSection(metaInflIndex) metaIndexCount, = struct.unpack_from('>L', metaInflIndexData, 0x18) if metaIndexCount != 1: - print "Error: Dictionary contains multiple inflection index sections, which is not yet supported" + print("Error: Dictionary contains multiple inflection index sections, which is not yet supported") decodeInflection = False inflIndexData = sect.loadSection(metaInflIndex + 1) inflNameData = sect.loadSection(metaInflIndex + 1 + metaIndexCount) tagSectionStart, = struct.unpack_from('>L', metaInflIndexData, 0x04) inflectionControlByteCount, inflectionTagTable = readTagSection(tagSectionStart, metaInflIndexData) if DEBUG_DICT: - print "inflectionTagTable: %s" % inflectionTagTable + print("inflectionTagTable: %s" % inflectionTagTable) if self.hasTag(inflectionTagTable, 0x07): - print "Error: Dictionary uses obsolete inflection rule scheme which is not yet supported" + print("Error: Dictionary uses obsolete inflection rule scheme which is not yet supported") decodeInflection = False data = sect.loadSection(metaOrthIndex) tagSectionStart, = struct.unpack_from('>L', data, 0x04) controlByteCount, tagTable = readTagSection(tagSectionStart, data) orthIndexCount, = struct.unpack_from('>L', data, 0x18) - print "orthIndexCount is", orthIndexCount + print("orthIndexCount is", orthIndexCount) if DEBUG_DICT: - print "orthTagTable: %s" % tagTable + print("orthTagTable: %s" % tagTable) hasEntryLength = self.hasTag(tagTable, 0x02) if not hasEntryLength: - print "Info: Index doesn't contain entry length tags" + print("Info: Index doesn't contain entry length tags") - print "Read dictionary index data" + print("Read dictionary index data") for i in range(metaOrthIndex + 1, metaOrthIndex + 1 + orthIndexCount): data = sect.loadSection(i) idxtPos, = struct.unpack_from('>L', data, 0x14) @@ -145,10 +145,10 @@ class dictSupport: # Make sure that the required tags are available. if 0x05 not in tagMap: - print "Error: Required tag 0x05 not found in tagMap" + print("Error: Required tag 0x05 not found in tagMap") return "" if 0x1a not in tagMap: - print "Error: Required tag 0x1a not found in tagMap" + print("Error: Required tag 0x1a not found in tagMap") return "" result += "<idx:infl>" @@ -230,7 +230,7 @@ class dictSupport: totalConsumed += consumed values.append(data) if totalConsumed != valueBytes: - print "Error: Should consume %s bytes, but consumed %s" % (valueBytes, totalConsumed) + print("Error: Should consume %s bytes, but consumed %s" % (valueBytes, totalConsumed)) tagHashMap[tag] = values # Test that all bytes have been processed if endPos is given. @@ -238,12 +238,12 @@ class dictSupport: # The last entry might have some zero padding bytes, so complain only if non zero bytes are left. for char in entryData[dataStart:endPos]: if char != chr(0x00): - print "Warning: There are unprocessed index bytes left: %s" % toHex(entryData[dataStart:endPos]) + print("Warning: There are unprocessed index bytes left: %s" % toHex(entryData[dataStart:endPos])) if DEBUG_DICT: - print "controlByteCount: %s" % controlByteCount - print "tagTable: %s" % tagTable - print "data: %s" % toHex(entryData[startPos:endPos]) - print "tagHashMap: %s" % tagHashMap + print("controlByteCount: %s" % controlByteCount) + print("tagTable: %s" % tagTable) + print("data: %s" % toHex(entryData[startPos:endPos])) + print("tagHashMap: %s" % tagHashMap) break return tagHashMap @@ -273,10 +273,10 @@ class dictSupport: position -= offset elif byte > 0x13: if mode == -1: - print "Error: Unexpected first byte %i of inflection rule" % byte + print("Error: Unexpected first byte %i of inflection rule" % byte) return None elif position == -1: - print "Error: Unexpected first byte %i of inflection rule" % byte + print("Error: Unexpected first byte %i of inflection rule" % byte) return None else: if mode == 0x01: @@ -292,19 +292,19 @@ class dictSupport: deleted = byteArray.pop(position) if deleted != char: if DEBUG_DICT: - print "0x03: %s %s %s %s" % (mainEntry, toHex(inflectionRuleData[start:end]), char, deleted) - print "Error: Delete operation of inflection rule failed" + print("0x03: %s %s %s %s" % (mainEntry, toHex(inflectionRuleData[start:end]), char, deleted)) + print("Error: Delete operation of inflection rule failed") return None elif mode == 0x04: # Delete at word start deleted = byteArray.pop(position) if deleted != char: if DEBUG_DICT: - print "0x03: %s %s %s %s" % (mainEntry, toHex(inflectionRuleData[start:end]), char, deleted) - print "Error: Delete operation of inflection rule failed" + print("0x03: %s %s %s %s" % (mainEntry, toHex(inflectionRuleData[start:end]), char, deleted)) + print("Error: Delete operation of inflection rule failed") return None else: - print "Error: Inflection rule mode %x is not implemented" % mode + print("Error: Inflection rule mode %x is not implemented" % mode) return None elif byte == 0x01: # Insert at word start @@ -327,7 +327,7 @@ class dictSupport: position = 0 mode = byte else: - print "Error: Inflection rule mode %x is not implemented" % byte + print("Error: Inflection rule mode %x is not implemented" % byte) return None return byteArray.tostring() diff -Nrup a/data/scripts/mobi_lib/mobi_html.py b/data/scripts/mobi_lib/mobi_html.py --- a/data/scripts/mobi_lib/mobi_html.py 2019-08-10 20:20:51.000000000 +0200 +++ b/data/scripts/mobi_lib/mobi_html.py 2021-02-01 19:06:14.185959247 +0100 @@ -1,4 +1,4 @@ -#!/usr/bin/env python2 +#!/usr/bin/python3 # vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai @@ -23,7 +23,7 @@ class HTMLProcessor: def findAnchors(self, rawtext, indx_data, positionMap): # process the raw text # find anchors... - print "Find link anchors" + print("Find link anchors") link_pattern = re.compile(r'''<[^<>]+filepos=['"]{0,1}(\d+)[^<>]*>''', re.IGNORECASE) # TEST NCX: merge in filepos from indx pos_links = [int(m.group(1)) for m in link_pattern.finditer(rawtext)] @@ -38,7 +38,7 @@ class HTMLProcessor: positionMap[position] = '<a id="filepos%d" />' % position # apply dictionary metadata and anchors - print "Insert data into html" + print("Insert data into html") pos = 0 lastPos = len(rawtext) dataList = [] @@ -63,7 +63,7 @@ class HTMLProcessor: metadata = self.metadata # put in the hrefs - print "Insert hrefs into html" + print("Insert hrefs into html") # Two different regex search and replace routines. # Best results are with the second so far IMO (DiapDealer). @@ -73,11 +73,11 @@ class HTMLProcessor: srctext = link_pattern.sub(r'''<a href="#filepos\1"\2>''', srctext) # remove empty anchors - print "Remove empty anchors from html" + print("Remove empty anchors from html") srctext = re.sub(r"<a/>",r"", srctext) # convert image references - print "Insert image references into html" + print("Insert image references into html") # split string into image tag pieces and other pieces image_pattern = re.compile(r'''(<img.*?>)''', re.IGNORECASE) image_index_pattern = re.compile(r'''recindex=['"]{0,1}([0-9]+)['"]{0,1}''', re.IGNORECASE) @@ -91,7 +91,7 @@ class HTMLProcessor: imageNumber = int(m.group(1)) imageName = imgnames[imageNumber-1] if imageName is None: - print "Error: Referenced image %s was not recognized as a valid image" % imageNumber + print("Error: Referenced image %s was not recognized as a valid image" % imageNumber) else: replacement = 'src="images/' + imageName + '"' tag = re.sub(image_index_pattern, replacement, tag, 1) @@ -128,8 +128,8 @@ class XHTMLK8Processor: posfid_index_pattern = re.compile(r'''['"]kindle:pos:fid:([0-9|A-V]+):off:([0-9|A-V]+).*?["']''') parts = [] - print "Building proper xhtml for each file" - for i in xrange(self.k8proc.getNumberOfParts()): + print("Building proper xhtml for each file") + for i in range(self.k8proc.getNumberOfParts()): part = self.k8proc.getPart(i) [partnum, dir, filename, beg, end, aidtext] = self.k8proc.getPartInfo(i) @@ -156,7 +156,7 @@ class XHTMLK8Processor: # we can safely remove all of the Kindlegen generated aid tags find_tag_with_aid_pattern = re.compile(r'''(<[^>]*\said\s*=[^>]*>)''', re.IGNORECASE) within_tag_aid_position_pattern = re.compile(r'''\said\s*=['"][^'"]*['"]''') - for i in xrange(len(parts)): + for i in range(len(parts)): part = parts[i] srcpieces = re.split(find_tag_with_aid_pattern, part) for j in range(len(srcpieces)): @@ -172,7 +172,7 @@ class XHTMLK8Processor: # we can safely remove all of the Kindlegen generated data-AmznPageBreak tags find_tag_with_AmznPageBreak_pattern = re.compile(r'''(<[^>]*\sdata-AmznPageBreak=[^>]*>)''', re.IGNORECASE) within_tag_AmznPageBreak_position_pattern = re.compile(r'''\sdata-AmznPageBreak=['"][^'"]*['"]''') - for i in xrange(len(parts)): + for i in range(len(parts)): part = parts[i] srcpieces = re.split(find_tag_with_AmznPageBreak_pattern, part) for j in range(len(srcpieces)): @@ -209,7 +209,7 @@ class XHTMLK8Processor: font_index_pattern = re.compile(r'''kindle:embed:([0-9|A-V]+)''', re.IGNORECASE) url_css_index_pattern = re.compile(r'''kindle:flow:([0-9|A-V]+)\?mime=text/css[^\)]*''', re.IGNORECASE) - for i in xrange(1, self.k8proc.getNumberOfFlows()): + for i in range(1, self.k8proc.getNumberOfFlows()): [type, format, dir, filename] = self.k8proc.getFlowInfo(i) flowpart = self.k8proc.getFlow(i) @@ -227,7 +227,7 @@ class XHTMLK8Processor: self.used[imageName] = 'used' tag = re.sub(img_index_pattern, replacement, tag, 1) else: - print "Error: Referenced image %s was not recognized as a valid image in %s" % (imageNumber, tag) + print("Error: Referenced image %s was not recognized as a valid image in %s" % (imageNumber, tag)) srcpieces[j] = tag flowpart = "".join(srcpieces) @@ -246,13 +246,13 @@ class XHTMLK8Processor: self.used[imageName] = 'used' tag = re.sub(url_img_index_pattern, replacement, tag, 1) else: - print "Error: Referenced image %s was not recognized as a valid image in %s" % (imageNumber, tag) + print("Error: Referenced image %s was not recognized as a valid image in %s" % (imageNumber, tag)) # process links to fonts for m in re.finditer(font_index_pattern, tag): fontNumber = fromBase32(m.group(1)) fontName = self.imgnames[fontNumber-1] if fontName is None: - print "Error: Referenced font %s was not recognized as a valid font in %s" % (fontNumber, tag) + print("Error: Referenced font %s was not recognized as a valid font in %s" % (fontNumber, tag)) else: replacement = '"../Fonts/' + fontName + '"' tag = re.sub(font_index_pattern, replacement, tag, 1) @@ -299,7 +299,7 @@ class XHTMLK8Processor: # kindle:flow:XXXX?mime=YYYY/ZZZ (used for style sheets, svg images, etc) tag_pattern = re.compile(r'''(<[^>]*>)''') flow_pattern = re.compile(r'''['"]kindle:flow:([0-9|A-V]+)\?mime=([^'"]+)['"]''', re.IGNORECASE) - for i in xrange(len(parts)): + for i in range(len(parts)): part = parts[i] [partnum, dir, filename, beg, end, aidtext] = self.k8proc.partinfo[i] @@ -327,7 +327,7 @@ class XHTMLK8Processor: # kindle:embed:XXXX?mime=image/gif (png, jpeg, etc) (used for images) img_pattern = re.compile(r'''(<[img\s|image\s][^>]*>)''', re.IGNORECASE) img_index_pattern = re.compile(r'''['"]kindle:embed:([0-9|A-V]+)[^'"]*['"]''') - for i in xrange(len(parts)): + for i in range(len(parts)): part = parts[i] [partnum, dir, filename, beg, end, aidtext] = self.k8proc.partinfo[i] @@ -345,7 +345,7 @@ class XHTMLK8Processor: self.used[imageName] = 'used' tag = re.sub(img_index_pattern, replacement, tag, 1) else: - print "Error: Referenced image %s was not recognized as a valid image in %s" % (imageNumber, tag) + print("Error: Referenced image %s was not recognized as a valid image in %s" % (imageNumber, tag)) srcpieces[j] = tag part = "".join(srcpieces) # store away modified version @@ -358,7 +358,7 @@ class XHTMLK8Processor: # in <li> remove value="XX" attributes since these are illegal tag_pattern = re.compile(r'''(<[^>]*>)''') li_value_pattern = re.compile(r'''\svalue\s*=\s*['"][^'"]*['"]''', re.IGNORECASE) - for i in xrange(len(parts)): + for i in range(len(parts)): part = parts[i] [partnum, dir, filename, beg, end, aidtext] = self.k8proc.partinfo[i] diff -Nrup a/data/scripts/mobi_lib/mobi_index.py b/data/scripts/mobi_lib/mobi_index.py --- a/data/scripts/mobi_lib/mobi_index.py 2019-08-10 20:20:51.000000000 +0200 +++ b/data/scripts/mobi_lib/mobi_index.py 2021-02-01 19:06:22.252958703 +0100 @@ -1,4 +1,4 @@ -#!/usr/bin/env python2 +#!/usr/bin/python3 # vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai DEBUG = False @@ -26,21 +26,21 @@ class MobiIndex: for j in range(idxhdr['nctoc']): cdata = sect.loadSection(off + j) ctocdict = self.readCTOC(cdata) - for k in ctocdict.keys(): + for k in list(ctocdict.keys()): ctoc_text[k + rec_off] = ctocdict[k] rec_off += 0x10000 tagSectionStart = idxhdr['len'] controlByteCount, tagTable = readTagSection(tagSectionStart, data) if DEBUG: - print "IndexCount is", IndexCount - print "TagTable: %s" % tagTable + print("IndexCount is", IndexCount) + print("TagTable: %s" % tagTable) for i in range(idx + 1, idx + 1 + IndexCount): data = sect.loadSection(i) hdrinfo = self.parseINDXHeader(data) idxtPos = hdrinfo['start'] entryCount = hdrinfo['count'] if DEBUG: - print idxtPos, entryCount + print(idxtPos, entryCount) # loop through to build up the IDXT position starts idxPositions = [] for j in range(entryCount): @@ -57,8 +57,8 @@ class MobiIndex: tagMap = self.getTagMap(controlByteCount, tagTable, data, startPos+1+textLength, endPos) outtbl.append([text, tagMap]) if DEBUG: - print tagMap - print text + print(tagMap) + print(text) return outtbl, ctoc_text def getTagMap(self, controlByteCount, tagTable, entryData, startPos, endPos): @@ -118,19 +118,19 @@ class MobiIndex: totalConsumed += consumed values.append(data) if totalConsumed != valueBytes: - print "Error: Should consume %s bytes, but consumed %s" % (valueBytes, totalConsumed) + print("Error: Should consume %s bytes, but consumed %s" % (valueBytes, totalConsumed)) tagHashMap[tag] = values # Test that all bytes have been processed if endPos is given. if endPos is not None and dataStart != endPos: # The last entry might have some zero padding bytes, so complain only if non zero bytes are left. for char in entryData[dataStart:endPos]: if char != chr(0x00): - print "Warning: There are unprocessed index bytes left: %s" % toHex(entryData[dataStart:endPos]) + print("Warning: There are unprocessed index bytes left: %s" % toHex(entryData[dataStart:endPos])) if DEBUG: - print "controlByteCount: %s" % controlByteCount - print "tagTable: %s" % tagTable - print "data: %s" % toHex(entryData[startPos:endPos]) - print "tagHashMap: %s" % tagHashMap + print("controlByteCount: %s" % controlByteCount) + print("tagTable: %s" % tagTable) + print("data: %s" % toHex(entryData[startPos:endPos])) + print("tagHashMap: %s" % tagHashMap) break return tagHashMap @@ -154,7 +154,7 @@ class MobiIndex: def parseINDXHeader(self, data): "read INDX header" if not data[:4] == 'INDX': - print "Warning: index section is not INDX" + print("Warning: index section is not INDX") return False words = ( 'len', 'nul1', 'type', 'gen', 'start', 'count', 'code', @@ -166,10 +166,10 @@ class MobiIndex: for n in range(num): header[words[n]] = values[n] if DEBUG: - print "parsed INDX header:" + print("parsed INDX header:") for n in words: - print n, "%X" % header[n], - print + print(n, "%X" % header[n], end=' ') + print() return header def readCTOC(self, txtdata): @@ -187,7 +187,7 @@ class MobiIndex: name = txtdata[offset:offset+ilen] offset += ilen if DEBUG: - print "name length is ", ilen - print idx_offs, name + print("name length is ", ilen) + print(idx_offs, name) ctoc_data[idx_offs] = name return ctoc_data diff -Nrup a/data/scripts/mobi_lib/mobi_k8proc.py b/data/scripts/mobi_lib/mobi_k8proc.py --- a/data/scripts/mobi_lib/mobi_k8proc.py 2019-08-10 20:20:51.000000000 +0200 +++ b/data/scripts/mobi_lib/mobi_k8proc.py 2021-02-01 19:06:30.686958134 +0100 @@ -1,4 +1,4 @@ -#!/usr/bin/env python2 +#!/usr/bin/python3 # vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai import sys, struct, re @@ -33,11 +33,11 @@ class K8Processor: sections = header[0x0c:] self.fdsttbl = struct.unpack_from('>%dL' % (num_sections*2), sections, 0)[::2] + (0xfffffff, ) else: - print "Error: K8 Mobi with Missing FDST info" + print("Error: K8 Mobi with Missing FDST info") if self.DEBUG: - print "\nFDST Section Map: %d entries" % len(self.fdsttbl) - for j in xrange(len(self.fdsttbl)): - print " %d - %0x" % (j, self.fdsttbl[j]) + print("\nFDST Section Map: %d entries" % len(self.fdsttbl)) + for j in range(len(self.fdsttbl)): + print(" %d - %0x" % (j, self.fdsttbl[j])) # read/process skeleton index info to create the skeleton table skeltbl = [] @@ -50,10 +50,10 @@ class K8Processor: fileptr += 1 self.skeltbl = skeltbl if self.DEBUG: - print "\nSkel Table: %d entries" % len(self.skeltbl) - print "table: filenum, skeleton name, div tbl record count, start position, length" - for j in xrange(len(self.skeltbl)): - print self.skeltbl[j] + print("\nSkel Table: %d entries" % len(self.skeltbl)) + print("table: filenum, skeleton name, div tbl record count, start position, length") + for j in range(len(self.skeltbl)): + print(self.skeltbl[j]) # read/process the div index to create to <div> (and <p>) table divtbl = [] @@ -66,10 +66,10 @@ class K8Processor: divtbl.append([int(text), ctocdata, tagMap[3][0], tagMap[4][0], tagMap[6][0], tagMap[6][1]]) self.divtbl = divtbl if self.DEBUG: - print "\nDiv (Fragment) Table: %d entries" % len(self.divtbl) - print "table: file position, link id text, file num, sequence number, start position, length" - for j in xrange(len(self.divtbl)): - print self.divtbl[j] + print("\nDiv (Fragment) Table: %d entries" % len(self.divtbl)) + print("table: file position, link id text, file num, sequence number, start position, length") + for j in range(len(self.divtbl)): + print(self.divtbl[j]) # read / process other index <guide> element of opf othtbl = [] @@ -81,29 +81,29 @@ class K8Processor: ref_title = ctoc_text[ctocoffset] ref_type = text fileno = None - if 3 in tagMap.keys(): + if 3 in list(tagMap.keys()): fileno = tagMap[3][0] - if 6 in tagMap.keys(): + if 6 in list(tagMap.keys()): fileno = tagMap[6][0] othtbl.append([ref_type, ref_title, fileno]) self.othtbl = othtbl if self.DEBUG: - print "\nOther (Guide) Table: %d entries" % len(self.othtbl) - print "table: ref_type, ref_title, divtbl entry number" - for j in xrange(len(self.othtbl)): - print self.othtbl[j] + print("\nOther (Guide) Table: %d entries" % len(self.othtbl)) + print("table: ref_type, ref_title, divtbl entry number") + for j in range(len(self.othtbl)): + print(self.othtbl[j]) def buildParts(self, rawML): # now split the rawML into its flow pieces self.flows = [] - for j in xrange(0, len(self.fdsttbl)-1): + for j in range(0, len(self.fdsttbl)-1): start = self.fdsttbl[j] end = self.fdsttbl[j+1] if end == 0xffffffff: end = len(rawML) if self.DEBUG: - print "splitting rawml starting at %d and ending at %d into flow piece %d" % (start, end, j) + print("splitting rawml starting at %d and ending at %d into flow piece %d" % (start, end, j)) self.flows.append(rawML[start:end]) # the first piece represents the xhtml text @@ -114,7 +114,7 @@ class K8Processor: # *without* destroying any file position information needed for later href processing # and create final list of file separation start: stop points and etc in partinfo if self.DEBUG: - print "\nRebuilding flow piece 0: the main body of the ebook" + print("\nRebuilding flow piece 0: the main body of the ebook") self.parts = [] self.partinfo = [] divptr = 0 @@ -125,8 +125,8 @@ class K8Processor: for i in range(divcnt): [insertpos, idtext, filenum, seqnum, startpos, length] = self.divtbl[divptr] if self.DEBUG: - print " moving div/frag %d starting at %d of length %d" % (divptr, startpos, length) - print " inside of skeleton number %d at postion %d" % (skelnum, insertpos) + print(" moving div/frag %d starting at %d of length %d" % (divptr, startpos, length)) + print(" inside of skeleton number %d at postion %d" % (skelnum, insertpos)) if i == 0: aidtext = idtext[12:-2] filename = 'part%04d.xhtml' % filenum @@ -160,7 +160,7 @@ class K8Processor: self.flowinfo.append([None, None, None, None]) svg_tag_pattern = re.compile(r'''(<svg[^>]*>)''', re.IGNORECASE) image_tag_pattern = re.compile(r'''(<image[^>]*>)''', re.IGNORECASE) - for j in xrange(1,len(self.flows)): + for j in range(1,len(self.flows)): flowpart = self.flows[j] nstr = '%04d' % j m = re.search(svg_tag_pattern, flowpart) @@ -198,14 +198,14 @@ class K8Processor: self.flowinfo.append([type, format, dir, fname]) if self.DEBUG: - print "\nFlow Map: %d entries" % len(self.flowinfo) + print("\nFlow Map: %d entries" % len(self.flowinfo)) for fi in self.flowinfo: - print fi - print "\n" + print(fi) + print("\n") - print "\nXHTML File Part Position Information: %d entries" % len(self.partinfo) + print("\nXHTML File Part Position Information: %d entries" % len(self.partinfo)) for pi in self.partinfo: - print pi + print(pi) if False: # self.DEBUG: # dump all of the locations of the aid tags used in TEXT @@ -214,12 +214,12 @@ class K8Processor: # [^>]* means match any amount of chars except for '>' char # [^'"] match any amount of chars except for the quote character # \s* means match any amount of whitespace - print "\npositions of all aid= pieces" + print("\npositions of all aid= pieces") id_pattern = re.compile(r'''<[^>]*\said\s*=\s*['"]([^'"]*)['"][^>]*>''',re.IGNORECASE) for m in re.finditer(id_pattern, rawML): - print "%0x %s %0x" % (m.start(), m.group(1), fromBase32(m.group(1))) + print("%0x %s %0x" % (m.start(), m.group(1), fromBase32(m.group(1)))) [filename, partnum, start, end] = self.getFileInfo(m.start()) - print " in %d %0x %0x" % (partnum, start, end) + print(" in %d %0x %0x" % (partnum, start, end)) return @@ -300,7 +300,7 @@ class K8Processor: n = len(idtbl) if n == 0: if self.DEBUG: - print "Found no id in the textblock, link must be to top of file" + print("Found no id in the textblock, link must be to top of file") return '' # if npos is before first id= inside a tag, return the first if npos < idtbl[0][0] : @@ -310,12 +310,12 @@ class K8Processor: return idtbl[n-1][1] # otherwise find last id before npos tgt = 0 - for r in xrange(n): + for r in range(n): if npos < idtbl[r][0]: tgt = r-1 break if self.DEBUG: - print pos, npos, idtbl[tgt] + print(pos, npos, idtbl[tgt]) return idtbl[tgt][1] @@ -328,7 +328,7 @@ class K8Processor: # do we need to do deep copying def setFlows(self, flows): assert(len(flows) == len(self.flows)) - for i in xrange(len(flows)): + for i in range(len(flows)): self.flows[i] = flows[i] @@ -351,5 +351,5 @@ class K8Processor: linktgt += '#' + idtext guidetext += '<reference type="%s" title="%s" href="%s/%s" />\n' % (ref_type, ref_title, dir, linktgt) # opf is encoded utf-8 so must convert any titles properly - guidetext = unicode(guidetext, self.mh.codec).encode("utf-8") + guidetext = str(guidetext, self.mh.codec).encode("utf-8") return guidetext diff -Nrup a/data/scripts/mobi_lib/mobi_ncx.py b/data/scripts/mobi_lib/mobi_ncx.py --- a/data/scripts/mobi_lib/mobi_ncx.py 2019-08-10 20:20:51.000000000 +0200 +++ b/data/scripts/mobi_lib/mobi_ncx.py 2021-02-01 19:06:40.439957476 +0100 @@ -1,4 +1,4 @@ -#!/usr/bin/env python2 +#!/usr/bin/python3 # vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai DEBUG_NCX = False @@ -34,8 +34,8 @@ class ncxExtract: if self.ncxidx != 0xffffffff: outtbl, ctoc_text = self.mi.getIndexData(self.ncxidx) if DEBUG_NCX: - print ctoc_text - print outtbl + print(ctoc_text) + print(outtbl) num = 0 for [text, tagMap] in outtbl: tmp = { @@ -52,7 +52,7 @@ class ncxExtract: 'childn' : -1, 'num' : num } - for tag in tag_fieldname_map.keys(): + for tag in list(tag_fieldname_map.keys()): [fieldname, i] = tag_fieldname_map[tag] if tag in tagMap: fieldvalue = tagMap[tag][i] @@ -68,16 +68,16 @@ class ncxExtract: tmp['kind'] = ctoc_text.get(fieldvalue, 'Unknown Kind') indx_data.append(tmp) if DEBUG_NCX: - print "record number: ", num - print "name: ", tmp['name'], - print "position", tmp['pos']," length: ", tmp['len'] - print "text: ", tmp['text'] - print "kind: ", tmp['kind'] - print "heading level: ", tmp['hlvl'] - print "parent:", tmp['parent'] - print "first child: ",tmp['child1']," last child: ", tmp['childn'] - print "pos_fid is ", tmp['pos_fid'] - print "\n\n" + print("record number: ", num) + print("name: ", tmp['name'], end=' ') + print("position", tmp['pos']," length: ", tmp['len']) + print("text: ", tmp['text']) + print("kind: ", tmp['kind']) + print("heading level: ", tmp['hlvl']) + print("parent:", tmp['parent']) + print("first child: ",tmp['child1']," last child: ", tmp['childn']) + print("pos_fid is ", tmp['pos_fid']) + print("\n\n") num += 1 num += 1 self.indx_data = indx_data @@ -118,10 +118,10 @@ class ncxExtract: #recursive part def recursINDX(max_lvl=0, num=0, lvl=0, start=-1, end=-1): if start>len(indx_data) or end>len(indx_data): - print "Warning: missing INDX child entries", start, end, len(indx_data) + print("Warning: missing INDX child entries", start, end, len(indx_data)) return '' if DEBUG_NCX: - print "recursINDX lvl %d from %d to %d" % (lvl, start, end) + print("recursINDX lvl %d from %d to %d" % (lvl, start, end)) xml = '' if start <= 0: start = 0 @@ -155,13 +155,13 @@ class ncxExtract: header = ncx_header % (ident, max_lvl + 1, title) ncx = header + body + ncx_footer if not len(indx_data) == num: - print "Warning: different number of entries in NCX", len(indx_data), num + print("Warning: different number of entries in NCX", len(indx_data), num) return ncx def writeNCX(self, metadata): # build the xml self.isNCX = True - print "Write ncx" + print("Write ncx") htmlname = os.path.basename(self.files.outbase) htmlname += '.html' xml = self.buildNCX(htmlname, metadata['Title'][0], metadata['UniqueID'][0]) @@ -202,10 +202,10 @@ class ncxExtract: #recursive part def recursINDX(max_lvl=0, num=0, lvl=0, start=-1, end=-1): if start>len(indx_data) or end>len(indx_data): - print "Warning: missing INDX child entries", start, end, len(indx_data) + print("Warning: missing INDX child entries", start, end, len(indx_data)) return '' if DEBUG_NCX: - print "recursINDX lvl %d from %d to %d" % (lvl, start, end) + print("recursINDX lvl %d from %d to %d" % (lvl, start, end)) xml = '' if start <= 0: start = 0 @@ -244,13 +244,13 @@ class ncxExtract: header = ncx_header % (ident, max_lvl + 1, title) ncx = header + body + ncx_footer if not len(indx_data) == num: - print "Warning: different number of entries in NCX", len(indx_data), num + print("Warning: different number of entries in NCX", len(indx_data), num) return ncx def writeK8NCX(self, ncx_data, metadata): # build the xml self.isNCX = True - print "Write K8 ncx" + print("Write K8 ncx") xml = self.buildK8NCX(ncx_data, metadata['Title'][0], metadata['UniqueID'][0]) bname = 'toc.ncx' ncxname = os.path.join(self.files.k8oebps,bname) diff -Nrup a/data/scripts/mobi_lib/mobi_opf.py b/data/scripts/mobi_lib/mobi_opf.py --- a/data/scripts/mobi_lib/mobi_opf.py 2019-08-10 20:20:51.000000000 +0200 +++ b/data/scripts/mobi_lib/mobi_opf.py 2021-02-01 19:06:50.084956825 +0100 @@ -1,4 +1,4 @@ -#!/usr/bin/env python2 +#!/usr/bin/python3 # vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai import sys, os, re, uuid @@ -21,7 +21,7 @@ class OPFProcessor: def writeOPF(self, has_obfuscated_fonts=False): # write out the metadata as an OEB 1.0 OPF file - print "Write opf" + print("Write opf") metadata = self.metadata META_TAGS = ['Drm Server Id', 'Drm Commerce Id', 'Drm Ebookbase Book Id', 'ASIN', 'ThumbOffset', 'Fake Cover', @@ -37,7 +37,7 @@ class OPFProcessor: @param key: The key of the metadata value to handle. @param tag: The opf tag the the metadata value. ''' - if key in metadata.keys(): + if key in list(metadata.keys()): for value in metadata[key]: # Strip all tag attributes for the closing tag. closingTag = tag.split(" ")[0] @@ -45,7 +45,7 @@ class OPFProcessor: del metadata[key] def handleMetaPairs(data, metadata, key, name): - if key in metadata.keys(): + if key in list(metadata.keys()): for value in metadata[key]: data.append('<meta name="%s" content="%s" />\n' % (name, value)) del metadata[key] @@ -55,12 +55,12 @@ class OPFProcessor: data.append('<package version="2.0" xmlns="http://www.idpf.org/2007/opf" unique-identifier="uid">\n') data.append('<metadata xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:opf="http://www.idpf.org/2007/opf">\n') # Handle standard metadata - if 'Title' in metadata.keys(): + if 'Title' in list(metadata.keys()): handleTag(data, metadata, 'Title', 'dc:title') else: data.append('<dc:title>Untitled</dc:title>\n') handleTag(data, metadata, 'Language', 'dc:language') - if 'UniqueID' in metadata.keys(): + if 'UniqueID' in list(metadata.keys()): handleTag(data, metadata, 'UniqueID', 'dc:identifier id="uid"') else: # No unique ID in original, give it a generic one. @@ -78,8 +78,8 @@ class OPFProcessor: handleTag(data, metadata, 'Source', 'dc:source') handleTag(data, metadata, 'Type', 'dc:type') handleTag(data, metadata, 'ISBN', 'dc:identifier opf:scheme="ISBN"') - if 'Subject' in metadata.keys(): - if 'SubjectCode' in metadata.keys(): + if 'Subject' in list(metadata.keys()): + if 'SubjectCode' in list(metadata.keys()): codeList = metadata['SubjectCode'] del metadata['SubjectCode'] else: @@ -96,11 +96,11 @@ class OPFProcessor: handleTag(data, metadata, 'Rights', 'dc:rights') handleTag(data, metadata, 'DictInLanguage', 'DictionaryInLanguage') handleTag(data, metadata, 'DictOutLanguage', 'DictionaryOutLanguage') - if 'CoverOffset' in metadata.keys(): + if 'CoverOffset' in list(metadata.keys()): imageNumber = int(metadata['CoverOffset'][0]) self.covername = self.imgnames[imageNumber] if self.covername is None: - print "Error: Cover image %s was not recognized as a valid image" % imageNumber + print("Error: Cover image %s was not recognized as a valid image" % imageNumber) else: if self.isK8: data.append('<meta name="cover" content="cover_img" />\n') @@ -122,22 +122,22 @@ class OPFProcessor: handleTag(data, metadata, 'Imprint', 'Imprint') handleTag(data, metadata, 'Adult', 'Adult') handleTag(data, metadata, 'DictShortName', 'DictionaryVeryShortName') - if 'Price' in metadata.keys() and 'Currency' in metadata.keys(): + if 'Price' in list(metadata.keys()) and 'Currency' in list(metadata.keys()): priceList = metadata['Price'] currencyList = metadata['Currency'] if len(priceList) != len(currencyList): - print "Error: found %s price entries, but %s currency entries." + print("Error: found %s price entries, but %s currency entries.") else: for i in range(len(priceList)): data.append('<SRP Currency="'+currencyList[i]+'">'+priceList[i]+'</SRP>\n') del metadata['Price'] del metadata['Currency'] data.append("<!-- The following meta tags are just for information and will be ignored by mobigen/kindlegen. -->\n") - if 'ThumbOffset' in metadata.keys(): + if 'ThumbOffset' in list(metadata.keys()): imageNumber = int(metadata['ThumbOffset'][0]) imageName = self.imgnames[imageNumber] if imageName is None: - print "Error: Cover Thumbnail image %s was not recognized as a valid image" % imageNumber + print("Error: Cover Thumbnail image %s was not recognized as a valid image" % imageNumber) else: if self.isK8: data.append('<meta name="Cover ThumbNail Image" content="'+ 'Images/'+imageName+'" />\n') @@ -146,11 +146,11 @@ class OPFProcessor: self.used[imageName] = 'used' del metadata['ThumbOffset'] for metaName in META_TAGS: - if metaName in metadata.keys(): + if metaName in list(metadata.keys()): for value in metadata[metaName]: data.append('<meta name="'+metaName+'" content="'+value+'" />\n') del metadata[metaName] - for key in metadata.keys(): + for key in list(metadata.keys()): for value in metadata[key]: if key == 'StartOffset' and int(value) == 0xffffffff: value = '0' @@ -228,7 +228,7 @@ class OPFProcessor: metaguidetext = '' if not self.isK8: # get guide items from metadata - if 'StartOffset' in metadata.keys(): + if 'StartOffset' in list(metadata.keys()): so = metadata.get('StartOffset')[0] if int(so) == 0xffffffff: so = '0' diff -Nrup a/data/scripts/mobi_lib/mobi_split.py b/data/scripts/mobi_lib/mobi_split.py --- a/data/scripts/mobi_lib/mobi_split.py 2019-08-10 20:20:51.000000000 +0200 +++ b/data/scripts/mobi_lib/mobi_split.py 2021-02-01 19:07:01.023956086 +0100 @@ -1,4 +1,4 @@ -#!/usr/bin/env python2 +#!/usr/bin/python3 # vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai import sys diff -Nrup a/data/scripts/mobi_lib/mobi_uncompress.py b/data/scripts/mobi_lib/mobi_uncompress.py --- a/data/scripts/mobi_lib/mobi_uncompress.py 2019-08-10 20:20:51.000000000 +0200 +++ b/data/scripts/mobi_lib/mobi_uncompress.py 2021-02-01 19:07:09.957955484 +0100 @@ -1,4 +1,4 @@ -#!/usr/bin/env python2 +#!/usr/bin/python3 # vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai import sys @@ -34,7 +34,7 @@ class PalmdocReader: if (m > n): o += o[-m:n-m] else: - for _ in xrange(n): + for _ in range(n): o += o[-m] return o @@ -53,7 +53,7 @@ class HuffcdicReader: assert term maxcode = ((maxcode + 1) << (32 - codelen)) - 1 return (codelen, term, maxcode) - self.dict1 = map(dict1_unpack, struct.unpack_from('>256L', huff, off1)) + self.dict1 = list(map(dict1_unpack, struct.unpack_from('>256L', huff, off1))) dict2 = struct.unpack_from('>64L', huff, off2) self.mincode, self.maxcode = (), () @@ -74,7 +74,7 @@ class HuffcdicReader: blen, = h(cdic, 16+off) slice = cdic[18+off:18+off+(blen&0x7fff)] return (slice, blen&0x8000) - self.dictionary += map(getslice, struct.unpack_from('>%dH' % n, cdic, 16)) + self.dictionary += list(map(getslice, struct.unpack_from('>%dH' % n, cdic, 16))) def unpack(self, data): q = HuffcdicReader.q diff -Nrup a/data/scripts/mobi_lib/mobi_unpack.py b/data/scripts/mobi_lib/mobi_unpack.py --- a/data/scripts/mobi_lib/mobi_unpack.py 2019-08-10 20:20:51.000000000 +0200 +++ b/data/scripts/mobi_lib/mobi_unpack.py 2021-02-01 19:07:18.081954935 +0100 @@ -1,4 +1,4 @@ -#!/usr/bin/env python2 +#!/usr/bin/python3 # vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai # Changelog @@ -256,7 +256,7 @@ class MobiHeader: self.header = self.sect.loadSection(self.start) self.records, = struct.unpack_from('>H', self.header, 0x8) self.length, self.type, self.codepage, self.unique_id, self.version = struct.unpack('>LLLLL', self.header[20:40]) - print "Mobi Version: ", self.version + print("Mobi Version: ", self.version) # codec self.codec = 'windows-1252' @@ -264,32 +264,32 @@ class MobiHeader: 1252 : 'windows-1252', 65001: 'utf-8', } - if self.codepage in codec_map.keys(): + if self.codepage in list(codec_map.keys()): self.codec = codec_map[self.codepage] - print "Codec: ", self.codec + print("Codec: ", self.codec) # title toff, tlen = struct.unpack('>II', self.header[0x54:0x5c]) tend = toff + tlen self.title=self.header[toff:tend] - print "Title: ", self.title + print("Title: ", self.title) # set up for decompression/unpacking compression, = struct.unpack_from('>H', self.header, 0x0) if compression == 0x4448: - print "Huffdic compression" + print("Huffdic compression") reader = HuffcdicReader() huffoff, huffnum = struct.unpack_from('>LL', self.header, 0x70) huffoff = huffoff + self.start reader.loadHuff(self.sect.loadSection(huffoff)) - for i in xrange(1, huffnum): + for i in range(1, huffnum): reader.loadCdic(self.sect.loadSection(huffoff+i)) self.unpack = reader.unpack elif compression == 2: - print "Palmdoc compression" + print("Palmdoc compression") self.unpack = PalmdocReader().unpack elif compression == 1: - print "No compression" + print("No compression") self.unpack = UncompressedReader().unpack else: raise unpackException('invalid compression type: 0x%4x' % compression) @@ -376,14 +376,14 @@ class MobiHeader: self.fdst += self.start if DEBUG: - print "firstaddl %0x" % self.firstaddl - print "ncxidx %0x" % self.ncxidx - print "exth flags %0x" % exth_flag + print("firstaddl %0x" % self.firstaddl) + print("ncxidx %0x" % self.ncxidx) + print("exth flags %0x" % exth_flag) if self.version == 8 or self.start != 0: - print "skelidx %0x" % self.skelidx - print "dividx %0x" % self.dividx - print "othidx %0x" % self.othidx - print "fdst %0x" % self.fdst + print("skelidx %0x" % self.skelidx) + print("dividx %0x" % self.dividx) + print("othidx %0x" % self.othidx) + print("fdst %0x" % self.fdst) # NOTE: See DumpMobiHeader.py for a complete set of header fields @@ -444,7 +444,7 @@ class MobiHeader: num = (num << 7) | (ord(v) & 0x7f) return num def trimTrailingDataEntries(data): - for _ in xrange(trailers): + for _ in range(trailers): num = getSizeOfTrailingDataEntry(data) data = data[:-num] if multibyte: @@ -464,10 +464,10 @@ class MobiHeader: trailers += 1 flags = flags >> 1 # get raw mobi markup languge - print "Unpack raw markup language" + print("Unpack raw markup language") dataList = [] # offset = 0 - for i in xrange(1, self.records+1): + for i in range(1, self.records+1): data = trimTrailingDataEntries(self.sect.loadSection(self.start + i)) dataList.append(self.unpack(data)) return "".join(dataList) @@ -542,17 +542,17 @@ class MobiHeader: else: metadata[name].append(value) if DEBUG: - print "multiple values: metadata[%s]=%s" % (name, metadata[name]) + print("multiple values: metadata[%s]=%s" % (name, metadata[name])) _length, num_items = struct.unpack('>LL', extheader[4:12]) extheader = extheader[12:] pos = 0 for _ in range(num_items): id, size = struct.unpack('>LL', extheader[pos:pos+8]) content = extheader[pos + 8: pos + size] - if id in id_map_strings.keys(): + if id in list(id_map_strings.keys()): name = id_map_strings[id] - addValue(name, unicode(content, codec).encode("utf-8")) - elif id in id_map_values.keys(): + addValue(name, str(content, codec).encode("utf-8")) + elif id in list(id_map_values.keys()): name = id_map_values[id] if size == 9: value, = struct.unpack('B',content) @@ -564,12 +564,12 @@ class MobiHeader: value, = struct.unpack('>L',content) addValue(name, str(value)) else: - print "Error: Value for %s has unexpected size of %s" % (name, size) - elif id in id_map_hexstrings.keys(): + print("Error: Value for %s has unexpected size of %s" % (name, size)) + elif id in list(id_map_hexstrings.keys()): name = id_map_hexstrings[id] addValue(name, content.encode('hex')) else: - print "Warning: Unknown metadata with id %s found" % id + print("Warning: Unknown metadata with id %s found" % id) name = str(id) + ' (hex)' addValue(name, content.encode('hex')) pos += size @@ -583,9 +583,9 @@ class MobiHeader: # for each table, read in count of sections, assume first section is a PDF # and output other sections as binary files paths = [] - for i in xrange(numTables): + for i in range(numTables): sectionCount, = struct.unpack_from('>L', rawML, 0x08 + 4*i) - for j in xrange(sectionCount): + for j in range(sectionCount): sectionOffset, sectionLength, = struct.unpack_from('>LL', rawML, tableIndexOffset) tableIndexOffset += 8 if j == 0: @@ -600,11 +600,11 @@ def process_all_mobi_headers(files, sect for mh in mhlst: if mh.isK8(): - print "\n\nProcessing K8 format Ebook ..." + print("\n\nProcessing K8 format Ebook ...") elif mh.isPrintReplica(): - print "\nProcessing PrintReplica (.azw4) format Ebook ..." + print("\nProcessing PrintReplica (.azw4) format Ebook ...") else: - print "\nProcessing Mobi format Ebook ..." + print("\nProcessing Mobi format Ebook ...") if DEBUG: # write out raw mobi header data @@ -620,12 +620,12 @@ def process_all_mobi_headers(files, sect # build up the metadata metadata = mh.getMetaData() metadata['Language'] = mh.Language() - metadata['Title'] = [unicode(mh.title, mh.codec).encode("utf-8")] + metadata['Title'] = [str(mh.title, mh.codec).encode("utf-8")] metadata['Codec'] = [mh.codec] metadata['UniqueID'] = [str(mh.unique_id)] if DEBUG: - print "MetaData from EXTH" - print metadata + print("MetaData from EXTH") + print(metadata) # save the raw markup language rawML = mh.getRawML() @@ -643,25 +643,25 @@ def process_all_mobi_headers(files, sect # process additional sections that represent images, resources, fonts, and etc # build up a list of image names to use to postprocess the rawml - print "Unpacking images, resources, fonts, etc" + print("Unpacking images, resources, fonts, etc") firstaddl = mh.getfirstAddl() if DEBUG: - print "firstaddl is ", firstaddl - print "num_sections is ", sect.num_sections - print "K8Boundary is ", K8Boundary + print("firstaddl is ", firstaddl) + print("num_sections is ", sect.num_sections) + print("K8Boundary is ", K8Boundary) beg = firstaddl end = sect.num_sections if firstaddl < K8Boundary: end = K8Boundary obfuscate_data = [] - for i in xrange(beg, end): + for i in range(beg, end): if DEBUG: - print "Section is ", i + print("Section is ", i) data = sect.loadSection(i) type = data[0:4] if type in ["FLIS", "FCIS", "FDST", "DATP"]: if DEBUG: - print 'First 4 bytes: %s' % toHex(data[0:4]) + print('First 4 bytes: %s' % toHex(data[0:4])) fname = "%05d" % (1+i-beg) fname = type + fname if mh.isK8(): @@ -669,13 +669,13 @@ def process_all_mobi_headers(files, sect fname += '.dat' outname= os.path.join(files.outdir, fname) file(outname, 'wb').write(data) - print "Skipping ", type, " section" + print("Skipping ", type, " section") imgnames.append(None) continue elif type == "SRCS": # The mobi file was created by kindlegen and contains a zip archive with all source files. # Extract the archive and save it. - print " Info: File contains kindlegen source archive, extracting as %s" % KINDLEGENSRC_FILENAME + print(" Info: File contains kindlegen source archive, extracting as %s" % KINDLEGENSRC_FILENAME) srcname = os.path.join(files.outdir, KINDLEGENSRC_FILENAME) file(srcname, 'wb').write(data[16:]) imgnames.append(None) @@ -699,7 +699,7 @@ def process_all_mobi_headers(files, sect # obfuscated so need to de-obfuscate the first 1040 bytes key = bytearray(data[xor_start: xor_start+ xor_len]) buf = bytearray(font_data) - for n in xrange(extent): + for n in range(extent): buf[n] ^= key[n%xor_len] font_data = bytes(buf) if fflags & 0x0001: @@ -709,29 +709,29 @@ def process_all_mobi_headers(files, sect adler32, = struct.unpack_from('>I', font_data, len(font_data) - 4) font_data = zlib.decompress(font_data[2:-4], -wbits, usize) if len(font_data) != usize: - print 'Font Decompression Error: Uncompressed font size mismatch' + print('Font Decompression Error: Uncompressed font size mismatch') if False: # For some reason these almost never match, probably Amazon has a # buggy Adler32 implementation sig = (zlib.adler32(font_data) & 0xffffffff) if sig != adler32: - print 'Font Decompression Error' - print 'Adler checksum did not match. Stored: %d Calculated: %d' % (adler32, sig) + print('Font Decompression Error') + print('Adler checksum did not match. Stored: %d Calculated: %d' % (adler32, sig)) else: - print "Error Decoding Font", str(err) + print("Error Decoding Font", str(err)) hdr = font_data[0:4] if hdr == '\0\1\0\0' or hdr == 'true' or hdr == 'ttcf': ext = '.ttf' elif hdr == 'OTTO': ext = '.otf' else: - print "Warning: unknown font header %s" % hdr.encode('hex') + print("Warning: unknown font header %s" % hdr.encode('hex')) ext = '.dat' fontname = "font%05d" % (1+i-beg) fontname += ext if (ext == '.ttf' or ext == '.otf') and (fflags & 0x0002): obfuscate_data.append(fontname) - print " extracting font: ", fontname + print(" extracting font: ", fontname) outfnt = os.path.join(files.imgdir, fontname) file(outfnt, 'wb').write(font_data) imgnames.append(fontname) @@ -746,7 +746,7 @@ def process_all_mobi_headers(files, sect if DEBUG: data = data[4:] rescname = "resc%05d.dat" % (1+i-beg) - print " extracting resource: ", rescname + print(" extracting resource: ", rescname) outrsc = os.path.join(files.imgdir, rescname) file(outrsc, 'wb').write(data) imgnames.append(None) @@ -754,7 +754,7 @@ def process_all_mobi_headers(files, sect if data == EOF_RECORD: if DEBUG: - print "Skip section %i as it contains the EOF record." % i + print("Skip section %i as it contains the EOF record." % i) imgnames.append(None) continue @@ -762,16 +762,16 @@ def process_all_mobi_headers(files, sect # Get the proper file extension imgtype = imghdr.what(None, data) if imgtype is None: - print "Warning: Section %s contains no image or an unknown image format" % i + print("Warning: Section %s contains no image or an unknown image format" % i) imgnames.append(None) if DEBUG: - print 'First 4 bytes: %s' % toHex(data[0:4]) + print('First 4 bytes: %s' % toHex(data[0:4])) fname = "unknown%05d.dat" % (1+i-beg) outname= os.path.join(files.outdir, fname) file(outname, 'wb').write(data) else: imgname = "image%05d.%s" % (1+i-beg, imgtype) - print " extracting image: ", imgname + print(" extracting image: ", imgname) outimg = os.path.join(files.imgdir, imgname) file(outimg, 'wb').write(data) imgnames.append(imgname) @@ -781,11 +781,11 @@ def process_all_mobi_headers(files, sect # Process print replica book. if mh.isPrintReplica() and not k8only: filenames = [] - print "Print Replica ebook detected" + print("Print Replica ebook detected") try: mh.processPrintReplica(files) - except Exception, e: - print 'Error processing Print Replica: ' + str(e) + except Exception as e: + print('Error processing Print Replica: ' + str(e)) filenames.append(['', files.getInputFileBasename() + '.pdf']) usedmap = {} for name in imgnames: @@ -805,7 +805,7 @@ def process_all_mobi_headers(files, sect # collect information for the guide first guidetext = k8proc.getGuideText() # add in any guide info from metadata, such as StartOffset - if 'StartOffset' in metadata.keys(): + if 'StartOffset' in list(metadata.keys()): starts = metadata['StartOffset'] last_start = starts.pop() if int(last_start) == 0xffffffff: @@ -904,7 +904,7 @@ def process_all_mobi_headers(files, sect replacetext = r'''href="'''+filenames[0][1]+r'''#filepos\1"''' guidetext = re.sub(r'''filepos=['"]{0,1}0*(\d+)['"]{0,1}''', replacetext, guidematch.group(1)) guidetext += '\n' - guidetext = unicode(guidetext, mh.codec).encode("utf-8") + guidetext = str(guidetext, mh.codec).encode("utf-8") opf = OPFProcessor(files, metadata, filenames, imgnames, ncx.isNCX, mh, usedmap, guidetext) opf.writeOPF() return @@ -915,7 +915,7 @@ def unpackBook(infile, outdir): # process the PalmDoc database header and verify it is a mobi sect = Sectionizer(infile) - print "Palm DB type: ", sect.ident + print("Palm DB type: ", sect.ident) if sect.ident != 'BOOKMOBI' and sect.ident != 'TEXtREAd': raise unpackException('invalid file format') @@ -940,12 +940,12 @@ def unpackBook(infile, outdir): # the last section uses an appended entry of 0xfffffff as its starting point # attempting to process it will cause problems if not hasK8: # if this is a mobi8-only file we don't need to do this - for i in xrange(len(sect.sections)-1): + for i in range(len(sect.sections)-1): before, after = sect.sections[i:i+2] if (after - before) == 8: data = sect.loadSection(i) if data == K8_BOUNDARY: - print "Mobi Ebook uses the new K8 file format" + print("Mobi Ebook uses the new K8 file format") mh = MobiHeader(sect,i+1) hasK8 = hasK8 or mh.isK8() mhlst.append(mh) @@ -984,7 +984,7 @@ class Mobi8Reader: # the last section uses an appended entry of 0xfffffff as its starting point # attempting to process it will cause problems if not self.hasK8: # if this is a mobi8-only file we don't need to do this - for i in xrange(len(self.sect.sections)-1): + for i in range(len(self.sect.sections)-1): before, after = self.sect.sections[i:i+2] if (after - before) == 8: data = self.sect.loadSection(i) @@ -1010,32 +1010,32 @@ class Mobi8Reader: def usage(progname): - print "" - print "Description:" - print " Unpacks an unencrypted Kindle/MobiPocket ebook to html and images" - print " or an unencrypted Kindle/Print Replica ebook to PDF and images" - print " into the specified output folder." - print "Usage:" - print " %s -r -s -d -h infile [outdir]" % progname - print "Options:" - print " -r write raw data to the output folder" - print " -s split combination mobis into mobi7 and mobi8 ebooks" - print " -d enable verbose debugging" - print " -h print this help message" + print("") + print("Description:") + print(" Unpacks an unencrypted Kindle/MobiPocket ebook to html and images") + print(" or an unencrypted Kindle/Print Replica ebook to PDF and images") + print(" into the specified output folder.") + print("Usage:") + print(" %s -r -s -d -h infile [outdir]" % progname) + print("Options:") + print(" -r write raw data to the output folder") + print(" -s split combination mobis into mobi7 and mobi8 ebooks") + print(" -d enable verbose debugging") + print(" -h print this help message") def main(argv=sys.argv): global DEBUG global WRITE_RAW_DATA global SPLIT_COMBO_MOBIS - print "MobiUnpack 0.47" - print " Copyright (c) 2009 Charles M. Hannum <root@ihack.net>" - print " With Additions by P. Durrant, K. Hendricks, S. Siebert, fandrieu, DiapDealer, nickredding." + print("MobiUnpack 0.47") + print(" Copyright (c) 2009 Charles M. Hannum <root@ihack.net>") + print(" With Additions by P. Durrant, K. Hendricks, S. Siebert, fandrieu, DiapDealer, nickredding.") progname = os.path.basename(argv[0]) try: opts, args = getopt.getopt(sys.argv[1:], "hdrs") - except getopt.GetoptError, err: - print str(err) + except getopt.GetoptError as err: + print(str(err)) usage(progname) sys.exit(2) @@ -1062,16 +1062,16 @@ def main(argv=sys.argv): infileext = os.path.splitext(infile)[1].upper() if infileext not in ['.MOBI', '.PRC', '.AZW', '.AZW4']: - print "Error: first parameter must be a Kindle/Mobipocket ebook or a Kindle/Print Replica ebook." + print("Error: first parameter must be a Kindle/Mobipocket ebook or a Kindle/Print Replica ebook.") return 1 try: - print 'Unpacking Book...' + print('Unpacking Book...') unpackBook(infile, outdir) - print 'Completed' + print('Completed') - except ValueError, e: - print "Error: %s" % e + except ValueError as e: + print("Error: %s" % e) return 1 return 0 diff -Nrup a/data/scripts/mobi_lib/mobi_utils.py b/data/scripts/mobi_lib/mobi_utils.py --- a/data/scripts/mobi_lib/mobi_utils.py 2019-08-10 20:20:51.000000000 +0200 +++ b/data/scripts/mobi_lib/mobi_utils.py 2021-02-01 19:07:26.483954368 +0100 @@ -1,4 +1,4 @@ -#!/usr/bin/env python2 +#!/usr/bin/python3 # vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai import sys @@ -130,7 +130,7 @@ def toBin(value, bits = 8): @param bits: The number of bits for the binary string (defaults to 8). @return: String with the binary representation. ''' - return "".join(map(lambda y:str((value>>y)&1), range(bits-1, -1, -1))) + return "".join([str((value>>y)&1) for y in range(bits-1, -1, -1)]) def toBase32(value, npad=4): digits = '0123456789ABCDEFGHIJKLMNOPQRSTUV' @@ -154,7 +154,7 @@ def fromBase32(str_num): j = 0 n = len(str_num) scale = 0 - for i in xrange(n): + for i in range(n): c = str_num[n-i-1:n-i] if c in '0123456789': v = (ord(c) - ord('0')) @@ -219,6 +219,6 @@ def mangle_fonts(encryption_key, data): """ crypt = data[:1024] key = cycle(iter(map(ord, encryption_key))) - encrypt = ''.join([chr(ord(x)^key.next()) for x in crypt]) + encrypt = ''.join([chr(ord(x)^next(key)) for x in crypt]) return encrypt + data[1024:]