mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-08 10:44:09 -04:00
py3: Port another instance of cStringIO
This commit is contained in:
parent
ce007d8bab
commit
5e10e3663b
@ -6,7 +6,7 @@ __license__ = 'GPL v3'
|
|||||||
__copyright__ = '2012, Kovid Goyal <kovid@kovidgoyal.net>'
|
__copyright__ = '2012, Kovid Goyal <kovid@kovidgoyal.net>'
|
||||||
__docformat__ = 'restructuredtext en'
|
__docformat__ = 'restructuredtext en'
|
||||||
|
|
||||||
import shutil, os, re, struct, textwrap, cStringIO
|
import shutil, os, re, struct, textwrap, io
|
||||||
|
|
||||||
from lxml import html, etree
|
from lxml import html, etree
|
||||||
|
|
||||||
@ -294,7 +294,7 @@ class MobiReader(object):
|
|||||||
|
|
||||||
parse_cache[htmlfile] = root
|
parse_cache[htmlfile] = root
|
||||||
self.htmlfile = htmlfile
|
self.htmlfile = htmlfile
|
||||||
ncx = cStringIO.StringIO()
|
ncx = io.BytesIO()
|
||||||
opf, ncx_manifest_entry = self.create_opf(htmlfile, guide, root)
|
opf, ncx_manifest_entry = self.create_opf(htmlfile, guide, root)
|
||||||
self.created_opf_path = os.path.splitext(htmlfile)[0] + '.opf'
|
self.created_opf_path = os.path.splitext(htmlfile)[0] + '.opf'
|
||||||
opf.render(lopen(self.created_opf_path, 'wb'), ncx,
|
opf.render(lopen(self.created_opf_path, 'wb'), ncx,
|
||||||
@ -311,7 +311,7 @@ class MobiReader(object):
|
|||||||
|
|
||||||
if self.book_header.exth is not None or self.embedded_mi is not None:
|
if self.book_header.exth is not None or self.embedded_mi is not None:
|
||||||
self.log.debug('Creating OPF...')
|
self.log.debug('Creating OPF...')
|
||||||
ncx = cStringIO.StringIO()
|
ncx = io.BytesIO()
|
||||||
opf, ncx_manifest_entry = self.create_opf(htmlfile, guide, root)
|
opf, ncx_manifest_entry = self.create_opf(htmlfile, guide, root)
|
||||||
opf.render(open(os.path.splitext(htmlfile)[0] + '.opf', 'wb'), ncx,
|
opf.render(open(os.path.splitext(htmlfile)[0] + '.opf', 'wb'), ncx,
|
||||||
ncx_manifest_entry)
|
ncx_manifest_entry)
|
||||||
@ -320,9 +320,9 @@ class MobiReader(object):
|
|||||||
write_as_utf8(os.path.splitext(htmlfile)[0] + '.ncx', ncx)
|
write_as_utf8(os.path.splitext(htmlfile)[0] + '.ncx', ncx)
|
||||||
|
|
||||||
def read_embedded_metadata(self, root, elem, guide):
|
def read_embedded_metadata(self, root, elem, guide):
|
||||||
raw = '<?xml version="1.0" encoding="utf-8" ?>\n<package>' + \
|
raw = b'<?xml version="1.0" encoding="utf-8" ?>\n<package>' + \
|
||||||
html.tostring(elem, encoding='utf-8') + '</package>'
|
html.tostring(elem, encoding='utf-8') + b'</package>'
|
||||||
stream = cStringIO.StringIO(raw)
|
stream = io.BytesIO(raw)
|
||||||
opf = OPF(stream)
|
opf = OPF(stream)
|
||||||
self.embedded_mi = opf.to_book_metadata()
|
self.embedded_mi = opf.to_book_metadata()
|
||||||
if guide is not None:
|
if guide is not None:
|
||||||
@ -828,37 +828,37 @@ class MobiReader(object):
|
|||||||
|
|
||||||
def add_anchors(self):
|
def add_anchors(self):
|
||||||
self.log.debug('Adding anchors...')
|
self.log.debug('Adding anchors...')
|
||||||
positions = set([])
|
positions = set()
|
||||||
link_pattern = re.compile(r'''<[^<>]+filepos=['"]{0,1}(\d+)[^<>]*>''',
|
link_pattern = re.compile(br'''<[^<>]+filepos=['"]{0,1}(\d+)[^<>]*>''',
|
||||||
re.IGNORECASE)
|
re.IGNORECASE)
|
||||||
for match in link_pattern.finditer(self.mobi_html):
|
for match in link_pattern.finditer(self.mobi_html):
|
||||||
positions.add(int(match.group(1)))
|
positions.add(int(match.group(1)))
|
||||||
pos = 0
|
pos = 0
|
||||||
processed_html = cStringIO.StringIO()
|
processed_html = []
|
||||||
end_tag_re = re.compile(r'<\s*/')
|
end_tag_re = re.compile(br'<\s*/')
|
||||||
for end in sorted(positions):
|
for end in sorted(positions):
|
||||||
if end == 0:
|
if end == 0:
|
||||||
continue
|
continue
|
||||||
oend = end
|
oend = end
|
||||||
l = self.mobi_html.find('<', end)
|
l = self.mobi_html.find(b'<', end)
|
||||||
r = self.mobi_html.find('>', end)
|
r = self.mobi_html.find(b'>', end)
|
||||||
anchor = '<a id="filepos%d"></a>'
|
anchor = b'<a id="filepos%d"></a>'
|
||||||
if r > -1 and (r < l or l == end or l == -1):
|
if r > -1 and (r < l or l == end or l == -1):
|
||||||
p = self.mobi_html.rfind('<', 0, end + 1)
|
p = self.mobi_html.rfind(b'<', 0, end + 1)
|
||||||
if (pos < end and p > -1 and not end_tag_re.match(self.mobi_html[p:r]) and
|
if (pos < end and p > -1 and not end_tag_re.match(self.mobi_html[p:r]) and
|
||||||
not self.mobi_html[p:r + 1].endswith('/>')):
|
not self.mobi_html[p:r + 1].endswith(b'/>')):
|
||||||
anchor = ' filepos-id="filepos%d"'
|
anchor = b' filepos-id="filepos%d"'
|
||||||
end = r
|
end = r
|
||||||
else:
|
else:
|
||||||
end = r + 1
|
end = r + 1
|
||||||
processed_html.write(self.mobi_html[pos:end] + (anchor % oend))
|
processed_html.append(self.mobi_html[pos:end] + (anchor % oend))
|
||||||
pos = end
|
pos = end
|
||||||
processed_html.write(self.mobi_html[pos:])
|
processed_html.append(self.mobi_html[pos:])
|
||||||
processed_html = processed_html.getvalue()
|
processed_html = b''.join(processed_html)
|
||||||
|
|
||||||
# Remove anchors placed inside entities
|
# Remove anchors placed inside entities
|
||||||
self.processed_html = re.sub(r'&([^;]*?)(<a id="filepos\d+"></a>)([^;]*);',
|
self.processed_html = re.sub(br'&([^;]*?)(<a id="filepos\d+"></a>)([^;]*);',
|
||||||
r'&\1\3;\2', processed_html)
|
br'&\1\3;\2', processed_html)
|
||||||
|
|
||||||
def extract_images(self, processed_records, output_dir):
|
def extract_images(self, processed_records, output_dir):
|
||||||
self.log.debug('Extracting images...')
|
self.log.debug('Extracting images...')
|
||||||
|
Loading…
x
Reference in New Issue
Block a user