mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Pull from trunk
This commit is contained in:
commit
536eb02887
@ -294,6 +294,9 @@ def xml2str(root, pretty_print=False, strip_comments=False):
|
|||||||
def xml2unicode(root, pretty_print=False):
|
def xml2unicode(root, pretty_print=False):
|
||||||
return etree.tostring(root, pretty_print=pretty_print)
|
return etree.tostring(root, pretty_print=pretty_print)
|
||||||
|
|
||||||
|
def xml2text(elem):
|
||||||
|
return etree.tostring(elem, method='text', encoding=unicode, with_tail=False)
|
||||||
|
|
||||||
ASCII_CHARS = set(chr(x) for x in xrange(128))
|
ASCII_CHARS = set(chr(x) for x in xrange(128))
|
||||||
UNIBYTE_CHARS = set(chr(x) for x in xrange(256))
|
UNIBYTE_CHARS = set(chr(x) for x in xrange(256))
|
||||||
URL_SAFE = set('ABCDEFGHIJKLMNOPQRSTUVWXYZ'
|
URL_SAFE = set('ABCDEFGHIJKLMNOPQRSTUVWXYZ'
|
||||||
|
@ -5,7 +5,7 @@ __docformat__ = 'restructuredtext en'
|
|||||||
|
|
||||||
'''
|
'''
|
||||||
Splitting of the XHTML flows. Splitting can happen on page boundaries or can be
|
Splitting of the XHTML flows. Splitting can happen on page boundaries or can be
|
||||||
forces at "likely" locations to conform to size limitations. This transform
|
forced at "likely" locations to conform to size limitations. This transform
|
||||||
assumes a prior call to the flatcss transform.
|
assumes a prior call to the flatcss transform.
|
||||||
'''
|
'''
|
||||||
|
|
||||||
@ -385,12 +385,18 @@ class FlowSplitter(object):
|
|||||||
raise SplitError(self.item.href, root)
|
raise SplitError(self.item.href, root)
|
||||||
self.log.debug('\t\t\tSplit point:', split_point.tag, tree.getpath(split_point))
|
self.log.debug('\t\t\tSplit point:', split_point.tag, tree.getpath(split_point))
|
||||||
|
|
||||||
for t in self.do_split(tree, split_point, before):
|
trees = self.do_split(tree, split_point, before)
|
||||||
|
sizes = [len(tostring(t.getroot())) for t in trees]
|
||||||
|
if min(sizes) < 5*1024:
|
||||||
|
self.log.debug('\t\t\tSplit tree too small')
|
||||||
|
self.split_to_size(tree)
|
||||||
|
return
|
||||||
|
|
||||||
|
for t, size in zip(trees, sizes):
|
||||||
r = t.getroot()
|
r = t.getroot()
|
||||||
if self.is_page_empty(r):
|
if self.is_page_empty(r):
|
||||||
continue
|
continue
|
||||||
size = len(tostring(r))
|
elif size <= self.max_flow_size:
|
||||||
if size <= self.max_flow_size:
|
|
||||||
self.split_trees.append(t)
|
self.split_trees.append(t)
|
||||||
self.log.debug(
|
self.log.debug(
|
||||||
'\t\t\tCommitted sub-tree #%d (%d KB)'%(
|
'\t\t\tCommitted sub-tree #%d (%d KB)'%(
|
||||||
|
@ -11,7 +11,7 @@ import re
|
|||||||
from lxml import etree
|
from lxml import etree
|
||||||
from urlparse import urlparse
|
from urlparse import urlparse
|
||||||
|
|
||||||
from calibre.ebooks.oeb.base import XPNSMAP, TOC, XHTML
|
from calibre.ebooks.oeb.base import XPNSMAP, TOC, XHTML, xml2text
|
||||||
from calibre.ebooks import ConversionError
|
from calibre.ebooks import ConversionError
|
||||||
|
|
||||||
def XPath(x):
|
def XPath(x):
|
||||||
@ -79,8 +79,7 @@ class DetectStructure(object):
|
|||||||
page_break_before = 'display: block; page-break-before: always'
|
page_break_before = 'display: block; page-break-before: always'
|
||||||
page_break_after = 'display: block; page-break-after: always'
|
page_break_after = 'display: block; page-break-after: always'
|
||||||
for item, elem in self.detected_chapters:
|
for item, elem in self.detected_chapters:
|
||||||
text = u' '.join([t.strip() for t in elem.xpath('descendant::text()')])
|
text = xml2text(elem).strip()
|
||||||
text = text.strip()
|
|
||||||
self.log('\tDetected chapter:', text[:50])
|
self.log('\tDetected chapter:', text[:50])
|
||||||
if chapter_mark == 'none':
|
if chapter_mark == 'none':
|
||||||
continue
|
continue
|
||||||
@ -120,8 +119,7 @@ class DetectStructure(object):
|
|||||||
if frag:
|
if frag:
|
||||||
href = '#'.join((href, frag))
|
href = '#'.join((href, frag))
|
||||||
if not self.oeb.toc.has_href(href):
|
if not self.oeb.toc.has_href(href):
|
||||||
text = u' '.join([t.strip() for t in \
|
text = xml2text(a)
|
||||||
a.xpath('descendant::text()')])
|
|
||||||
text = text[:100].strip()
|
text = text[:100].strip()
|
||||||
if not self.oeb.toc.has_text(text):
|
if not self.oeb.toc.has_text(text):
|
||||||
num += 1
|
num += 1
|
||||||
@ -135,7 +133,7 @@ class DetectStructure(object):
|
|||||||
|
|
||||||
|
|
||||||
def elem_to_link(self, item, elem, counter):
|
def elem_to_link(self, item, elem, counter):
|
||||||
text = u' '.join([t.strip() for t in elem.xpath('descendant::text()')])
|
text = xml2text(elem)
|
||||||
text = text[:100].strip()
|
text = text[:100].strip()
|
||||||
id = elem.get('id', 'calibre_toc_%d'%counter)
|
id = elem.get('id', 'calibre_toc_%d'%counter)
|
||||||
elem.set('id', id)
|
elem.set('id', id)
|
||||||
|
Loading…
x
Reference in New Issue
Block a user