mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Fix #1054 (Any2epub fails) and put in a check to detect files that have a large markup:text ratio during splitting.
This commit is contained in:
parent
eadb7172c4
commit
8652cff214
@ -42,7 +42,8 @@ class Splitter(LoggingInterface):
|
|||||||
self.always_remove = always_remove
|
self.always_remove = always_remove
|
||||||
self.base = os.path.splitext(path)[0] + '_split_%d.html'
|
self.base = os.path.splitext(path)[0] + '_split_%d.html'
|
||||||
self.opts = opts
|
self.opts = opts
|
||||||
self.log_info('\tSplitting %s (%d KB)', path, os.stat(content(path)).st_size/1024.)
|
self.orig_size = os.stat(content(path)).st_size
|
||||||
|
self.log_info('\tSplitting %s (%d KB)', path, self.orig_size/1024.)
|
||||||
root = html.fromstring(open(content(path)).read())
|
root = html.fromstring(open(content(path)).read())
|
||||||
|
|
||||||
css = XPath('//link[@type = "text/css" and @rel = "stylesheet"]')(root)
|
css = XPath('//link[@type = "text/css" and @rel = "stylesheet"]')(root)
|
||||||
@ -61,8 +62,9 @@ class Splitter(LoggingInterface):
|
|||||||
self.page_breaks = []
|
self.page_breaks = []
|
||||||
if stylesheet is not None:
|
if stylesheet is not None:
|
||||||
self.find_page_breaks(stylesheet, root)
|
self.find_page_breaks(stylesheet, root)
|
||||||
|
|
||||||
self.trees = []
|
self.trees = []
|
||||||
|
self.split_size = 0
|
||||||
self.split(root.getroottree())
|
self.split(root.getroottree())
|
||||||
self.commit()
|
self.commit()
|
||||||
self.log_info('\t\tSplit into %d parts.', len(self.trees))
|
self.log_info('\t\tSplit into %d parts.', len(self.trees))
|
||||||
@ -80,7 +82,7 @@ class Splitter(LoggingInterface):
|
|||||||
self.log_debug('\t\tSplitting...')
|
self.log_debug('\t\tSplitting...')
|
||||||
root = tree.getroot()
|
root = tree.getroot()
|
||||||
split_point, before = self.find_split_point(root)
|
split_point, before = self.find_split_point(root)
|
||||||
if split_point is None:
|
if split_point is None:# or self.split_size > 6*self.orig_size:
|
||||||
if not self.always_remove:
|
if not self.always_remove:
|
||||||
self.log_warn(_('\t\tToo much markup. Re-splitting without structure preservation. This may cause incorrect rendering.'))
|
self.log_warn(_('\t\tToo much markup. Re-splitting without structure preservation. This may cause incorrect rendering.'))
|
||||||
raise SplitError(self.path, root)
|
raise SplitError(self.path, root)
|
||||||
@ -144,6 +146,7 @@ class Splitter(LoggingInterface):
|
|||||||
if size <= self.opts.profile.flow_size:
|
if size <= self.opts.profile.flow_size:
|
||||||
self.trees.append(t)
|
self.trees.append(t)
|
||||||
self.log_debug('\t\t\tCommitted sub-tree #%d (%d KB)', len(self.trees), size/1024.)
|
self.log_debug('\t\t\tCommitted sub-tree #%d (%d KB)', len(self.trees), size/1024.)
|
||||||
|
self.split_size += size
|
||||||
else:
|
else:
|
||||||
self.split(t)
|
self.split(t)
|
||||||
|
|
||||||
|
@ -350,6 +350,8 @@ class MobiReader(object):
|
|||||||
pos = 0
|
pos = 0
|
||||||
self.processed_html = ''
|
self.processed_html = ''
|
||||||
for end in positions:
|
for end in positions:
|
||||||
|
if end == 0:
|
||||||
|
continue
|
||||||
oend = end
|
oend = end
|
||||||
l = self.mobi_html.find('<', end)
|
l = self.mobi_html.find('<', end)
|
||||||
r = self.mobi_html.find('>', end)
|
r = self.mobi_html.find('>', end)
|
||||||
|
Loading…
x
Reference in New Issue
Block a user