mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Fix detection of top of a page and change handling of <a name> elements to prevent reader slowdowns.
This commit is contained in:
parent
c44bd5ab55
commit
21af20d036
@ -31,7 +31,7 @@ from libprs500.lrf.html.BeautifulSoup import BeautifulSoup, Comment, Tag, \
|
|||||||
NavigableString, Declaration, ProcessingInstruction
|
NavigableString, Declaration, ProcessingInstruction
|
||||||
from libprs500.lrf.pylrs.pylrs import Paragraph, CR, Italic, ImageStream, TextBlock, \
|
from libprs500.lrf.pylrs.pylrs import Paragraph, CR, Italic, ImageStream, TextBlock, \
|
||||||
ImageBlock, JumpButton, CharButton, BlockStyle,\
|
ImageBlock, JumpButton, CharButton, BlockStyle,\
|
||||||
Page, Bold, Space, Plot, TextStyle, Image
|
Page, Bold, Space, Plot, TextStyle, Image, BlockSpace
|
||||||
from libprs500.lrf.pylrs.pylrs import Span as _Span
|
from libprs500.lrf.pylrs.pylrs import Span as _Span
|
||||||
from libprs500.lrf import ConversionError, option_parser, Book
|
from libprs500.lrf import ConversionError, option_parser, Book
|
||||||
from libprs500 import extract
|
from libprs500 import extract
|
||||||
@ -377,15 +377,16 @@ class HTMLConverter(object):
|
|||||||
self.current_block.append_to(self.current_page)
|
self.current_block.append_to(self.current_page)
|
||||||
if self.current_page and self.current_page.get_text().strip():
|
if self.current_page and self.current_page.get_text().strip():
|
||||||
self.book.append(self.current_page)
|
self.book.append(self.current_page)
|
||||||
previous = self.current_page
|
|
||||||
|
|
||||||
if not self.top.parent:
|
if not self.top.parent:
|
||||||
if not previous:
|
if not previous:
|
||||||
previous = self.current_page
|
self.top = self.book.pages()[0].contents[0]
|
||||||
|
else:
|
||||||
found = False
|
found = False
|
||||||
for page in self.book.pages():
|
for page in self.book.pages():
|
||||||
if page == previous:
|
if page == previous:
|
||||||
found = True
|
found = True
|
||||||
|
continue
|
||||||
if found:
|
if found:
|
||||||
self.top = page.contents[0]
|
self.top = page.contents[0]
|
||||||
break
|
break
|
||||||
@ -588,37 +589,33 @@ class HTMLConverter(object):
|
|||||||
pass
|
pass
|
||||||
elif tagname == 'a' and self.max_link_levels >= 0:
|
elif tagname == 'a' and self.max_link_levels >= 0:
|
||||||
if tag.has_key('name'):
|
if tag.has_key('name'):
|
||||||
self.current_para.append_to(self.current_block)
|
|
||||||
self.current_block.append_to(self.current_page)
|
|
||||||
previous = self.current_block
|
previous = self.current_block
|
||||||
tb = TextBlock()
|
|
||||||
self.current_block = tb
|
|
||||||
self.current_para = Paragraph()
|
|
||||||
self.targets[tag['name']] = tb
|
|
||||||
self.process_children(tag, tag_css)
|
self.process_children(tag, tag_css)
|
||||||
if tb.parent == None:
|
target = None
|
||||||
if self.current_block == tb:
|
if self.current_block == previous:
|
||||||
self.current_para.append_to(self.current_block)
|
self.current_para.append_to(self.current_block)
|
||||||
self.current_para = Paragraph()
|
self.current_para = Paragraph()
|
||||||
if not self.current_block.get_text().strip():
|
if self.current_block.get_text().strip():
|
||||||
# This is neccessary as apparently the reader
|
target = self.current_block
|
||||||
# cannot handle empty TextBlocks, although
|
|
||||||
# the Connect software displays them correctly
|
|
||||||
mkr = TextBlock()
|
|
||||||
mkr.append(Paragraph(text=' '))
|
|
||||||
self.current_page.append(mkr)
|
|
||||||
#self.current_page.append(self.current_block)
|
|
||||||
#self.current_block = TextBlock()
|
|
||||||
else:
|
else:
|
||||||
found, marked = False, False
|
target = BlockSpace()
|
||||||
|
self.current_page.append(target)
|
||||||
|
else:
|
||||||
|
found = False
|
||||||
for item in self.current_page.contents:
|
for item in self.current_page.contents:
|
||||||
if item == previous:
|
if item == previous:
|
||||||
found = True
|
found = True
|
||||||
if found and isinstance(item, TextBlock):
|
continue
|
||||||
self.targets[tag['name']] = item
|
if found:
|
||||||
marked = True
|
target = item
|
||||||
if not marked:
|
break
|
||||||
self.current_page.append(tb)
|
if target == None:
|
||||||
|
if self.current_block.get_text().strip():
|
||||||
|
target = self.current_block
|
||||||
|
else:
|
||||||
|
target = BlockSpace()
|
||||||
|
self.current_page.append(target)
|
||||||
|
self.targets[tag['name']] = target
|
||||||
elif tag.has_key('href'):
|
elif tag.has_key('href'):
|
||||||
purl = urlparse(tag['href'])
|
purl = urlparse(tag['href'])
|
||||||
path = purl[2]
|
path = purl[2]
|
||||||
@ -699,9 +696,13 @@ class HTMLConverter(object):
|
|||||||
elif tagname in ['ul', 'ol']:
|
elif tagname in ['ul', 'ol']:
|
||||||
self.in_ol = 1 if tagname == 'ol' else 0
|
self.in_ol = 1 if tagname == 'ol' else 0
|
||||||
self.end_current_para()
|
self.end_current_para()
|
||||||
|
self.current_block.append_to(self.current_page)
|
||||||
|
self.current_block = TextBlock()
|
||||||
self.process_children(tag, tag_css)
|
self.process_children(tag, tag_css)
|
||||||
self.in_ol = 0
|
self.in_ol = 0
|
||||||
self.end_current_para()
|
self.end_current_para()
|
||||||
|
self.current_block.append_to(self.current_page)
|
||||||
|
self.current_block = TextBlock()
|
||||||
elif tagname == 'li':
|
elif tagname == 'li':
|
||||||
prepend = str(self.in_ol)+'. ' if self.in_ol else u'\u2022' + ' '
|
prepend = str(self.in_ol)+'. ' if self.in_ol else u'\u2022' + ' '
|
||||||
if self.current_para.get_text().strip():
|
if self.current_para.get_text().strip():
|
||||||
|
@ -1791,9 +1791,10 @@ class Bold(Span):
|
|||||||
return e
|
return e
|
||||||
|
|
||||||
|
|
||||||
class BlockSpace(LrsContainer):
|
class BlockSpace(LrsContainer, LrsObject):
|
||||||
""" Can be appended to a page to move the text point. """
|
""" Can be appended to a page to move the text point. """
|
||||||
def __init__(self, xspace=0, yspace=0, x=0, y=0):
|
def __init__(self, xspace=0, yspace=0, x=0, y=0):
|
||||||
|
LrsObject.__init__(self)
|
||||||
LrsContainer.__init__(self, [])
|
LrsContainer.__init__(self, [])
|
||||||
if xspace == 0 and x != 0: xspace = x
|
if xspace == 0 and x != 0: xspace = x
|
||||||
if yspace == 0 and y != 0: yspace = y
|
if yspace == 0 and y != 0: yspace = y
|
||||||
|
Loading…
x
Reference in New Issue
Block a user