From 5e10e3663bf0239538261b0d01cf76de431e101c Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Mon, 1 Apr 2019 16:49:20 +0530 Subject: [PATCH] py3: Port another instance of cStringIO --- src/calibre/ebooks/mobi/reader/mobi6.py | 42 ++++++++++++------------- 1 file changed, 21 insertions(+), 21 deletions(-) diff --git a/src/calibre/ebooks/mobi/reader/mobi6.py b/src/calibre/ebooks/mobi/reader/mobi6.py index c8d247a638..fea1cc78ed 100644 --- a/src/calibre/ebooks/mobi/reader/mobi6.py +++ b/src/calibre/ebooks/mobi/reader/mobi6.py @@ -6,7 +6,7 @@ __license__ = 'GPL v3' __copyright__ = '2012, Kovid Goyal ' __docformat__ = 'restructuredtext en' -import shutil, os, re, struct, textwrap, cStringIO +import shutil, os, re, struct, textwrap, io from lxml import html, etree @@ -294,7 +294,7 @@ class MobiReader(object): parse_cache[htmlfile] = root self.htmlfile = htmlfile - ncx = cStringIO.StringIO() + ncx = io.BytesIO() opf, ncx_manifest_entry = self.create_opf(htmlfile, guide, root) self.created_opf_path = os.path.splitext(htmlfile)[0] + '.opf' opf.render(lopen(self.created_opf_path, 'wb'), ncx, @@ -311,7 +311,7 @@ class MobiReader(object): if self.book_header.exth is not None or self.embedded_mi is not None: self.log.debug('Creating OPF...') - ncx = cStringIO.StringIO() + ncx = io.BytesIO() opf, ncx_manifest_entry = self.create_opf(htmlfile, guide, root) opf.render(open(os.path.splitext(htmlfile)[0] + '.opf', 'wb'), ncx, ncx_manifest_entry) @@ -320,9 +320,9 @@ class MobiReader(object): write_as_utf8(os.path.splitext(htmlfile)[0] + '.ncx', ncx) def read_embedded_metadata(self, root, elem, guide): - raw = '\n' + \ - html.tostring(elem, encoding='utf-8') + '' - stream = cStringIO.StringIO(raw) + raw = b'\n' + \ + html.tostring(elem, encoding='utf-8') + b'' + stream = io.BytesIO(raw) opf = OPF(stream) self.embedded_mi = opf.to_book_metadata() if guide is not None: @@ -828,37 +828,37 @@ class MobiReader(object): def add_anchors(self): self.log.debug('Adding anchors...') - positions = set([]) - link_pattern = re.compile(r'''<[^<>]+filepos=['"]{0,1}(\d+)[^<>]*>''', + positions = set() + link_pattern = re.compile(br'''<[^<>]+filepos=['"]{0,1}(\d+)[^<>]*>''', re.IGNORECASE) for match in link_pattern.finditer(self.mobi_html): positions.add(int(match.group(1))) pos = 0 - processed_html = cStringIO.StringIO() - end_tag_re = re.compile(r'<\s*/') + processed_html = [] + end_tag_re = re.compile(br'<\s*/') for end in sorted(positions): if end == 0: continue oend = end - l = self.mobi_html.find('<', end) - r = self.mobi_html.find('>', end) - anchor = '' + l = self.mobi_html.find(b'<', end) + r = self.mobi_html.find(b'>', end) + anchor = b'' if r > -1 and (r < l or l == end or l == -1): - p = self.mobi_html.rfind('<', 0, end + 1) + p = self.mobi_html.rfind(b'<', 0, end + 1) if (pos < end and p > -1 and not end_tag_re.match(self.mobi_html[p:r]) and - not self.mobi_html[p:r + 1].endswith('/>')): - anchor = ' filepos-id="filepos%d"' + not self.mobi_html[p:r + 1].endswith(b'/>')): + anchor = b' filepos-id="filepos%d"' end = r else: end = r + 1 - processed_html.write(self.mobi_html[pos:end] + (anchor % oend)) + processed_html.append(self.mobi_html[pos:end] + (anchor % oend)) pos = end - processed_html.write(self.mobi_html[pos:]) - processed_html = processed_html.getvalue() + processed_html.append(self.mobi_html[pos:]) + processed_html = b''.join(processed_html) # Remove anchors placed inside entities - self.processed_html = re.sub(r'&([^;]*?)()([^;]*);', - r'&\1\3;\2', processed_html) + self.processed_html = re.sub(br'&([^;]*?)()([^;]*);', + br'&\1\3;\2', processed_html) def extract_images(self, processed_records, output_dir): self.log.debug('Extracting images...')