mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Improve handling of images referenced in <a> tags and place large images in an image block
This commit is contained in:
parent
44a50922cd
commit
05c1c36719
@ -36,7 +36,7 @@ from libprs500.ebooks.BeautifulSoup import BeautifulSoup, BeautifulStoneSoup, \
|
|||||||
Comment, Tag, NavigableString, Declaration, ProcessingInstruction
|
Comment, Tag, NavigableString, Declaration, ProcessingInstruction
|
||||||
from libprs500.ebooks.lrf.pylrs.pylrs import Paragraph, CR, Italic, ImageStream, \
|
from libprs500.ebooks.lrf.pylrs.pylrs import Paragraph, CR, Italic, ImageStream, \
|
||||||
TextBlock, ImageBlock, JumpButton, CharButton, Bold, Space, \
|
TextBlock, ImageBlock, JumpButton, CharButton, Bold, Space, \
|
||||||
Plot, Image, BlockSpace, RuledLine, BookSetting
|
Plot, Image, BlockSpace, RuledLine, BookSetting, Canvas
|
||||||
from libprs500.ebooks.lrf.pylrs.pylrs import Span as _Span
|
from libprs500.ebooks.lrf.pylrs.pylrs import Span as _Span
|
||||||
from libprs500.ebooks.lrf import ConversionError, option_parser, Book, PRS500_PROFILE
|
from libprs500.ebooks.lrf import ConversionError, option_parser, Book, PRS500_PROFILE
|
||||||
from libprs500 import extract, filename_to_utf8
|
from libprs500 import extract, filename_to_utf8
|
||||||
@ -217,8 +217,10 @@ class HTMLConverter(object):
|
|||||||
PAGE_BREAK_PAT = re.compile(r'page-break-(?:after|before)\s*:\s*(\w+)', re.IGNORECASE)
|
PAGE_BREAK_PAT = re.compile(r'page-break-(?:after|before)\s*:\s*(\w+)', re.IGNORECASE)
|
||||||
IGNORED_TAGS = (Comment, Declaration, ProcessingInstruction)
|
IGNORED_TAGS = (Comment, Declaration, ProcessingInstruction)
|
||||||
# Fix <a /> elements
|
# Fix <a /> elements
|
||||||
MARKUP_MASSAGE = [(re.compile("(<\s*[aA]\s+.*\/)\s*>"),
|
MARKUP_MASSAGE = [(re.compile("(<\s*[aA]\s+.*\/)\s*>"), #Close <a /> tags
|
||||||
lambda match: match.group(1)+"></a>"),
|
lambda match: match.group(1)+"></a>"),
|
||||||
|
# Strip comments from <style> tags. This is needed as
|
||||||
|
# sometimes there are unterminated comments
|
||||||
(re.compile(r"<\s*style.*?>.*?(<\!--).*?<.\s*style\s*>", re.DOTALL|re.IGNORECASE),
|
(re.compile(r"<\s*style.*?>.*?(<\!--).*?<.\s*style\s*>", re.DOTALL|re.IGNORECASE),
|
||||||
lambda match: match.group().replace('<!--', '').replace('-->', '')),
|
lambda match: match.group().replace('<!--', '').replace('-->', '')),
|
||||||
]
|
]
|
||||||
@ -442,22 +444,20 @@ class HTMLConverter(object):
|
|||||||
if not self.top.parent:
|
if not self.top.parent:
|
||||||
if not previous:
|
if not previous:
|
||||||
try:
|
try:
|
||||||
previous = get_valid_block(self.book.pages()[0])
|
previous = self.book.pages()[0]
|
||||||
except IndexError:
|
except IndexError:
|
||||||
previous = self.current_page
|
raise ConversionError, self.file_name + ' does not seem to have any content'
|
||||||
else:
|
|
||||||
found = False
|
found = False
|
||||||
for page in self.book.pages():
|
for page in self.book.pages():
|
||||||
if page == previous:
|
if page == previous:
|
||||||
found = True
|
found = True
|
||||||
|
continue
|
||||||
|
if found:
|
||||||
|
self.top = get_valid_block(page)
|
||||||
|
if not self.top:
|
||||||
continue
|
continue
|
||||||
if found:
|
break
|
||||||
self.top = get_valid_block(page)
|
|
||||||
if not self.top:
|
|
||||||
continue
|
|
||||||
break
|
|
||||||
if not self.top.parent:
|
|
||||||
self.top = get_valid_block(self.current_page)
|
|
||||||
if not self.top or not self.top.parent:
|
if not self.top or not self.top.parent:
|
||||||
raise ConversionError, 'Could not parse ' + self.file_name
|
raise ConversionError, 'Could not parse ' + self.file_name
|
||||||
|
|
||||||
@ -622,7 +622,8 @@ class HTMLConverter(object):
|
|||||||
textheight=self.profile.screen_height)
|
textheight=self.profile.screen_height)
|
||||||
if not self.images.has_key(path):
|
if not self.images.has_key(path):
|
||||||
self.images[path] = ImageStream(path)
|
self.images[path] = ImageStream(path)
|
||||||
page.append(ImageBlock(self.images[path]))
|
ib = ImageBlock(self.images[path])
|
||||||
|
page.append(ib)
|
||||||
self.book.append(page)
|
self.book.append(page)
|
||||||
|
|
||||||
def process_children(self, ptag, pcss):
|
def process_children(self, ptag, pcss):
|
||||||
@ -725,6 +726,69 @@ class HTMLConverter(object):
|
|||||||
self.current_block = self.book.create_text_block(textStyle=self.current_block.textStyle,
|
self.current_block = self.book.create_text_block(textStyle=self.current_block.textStyle,
|
||||||
blockStyle=self.current_block.blockStyle)
|
blockStyle=self.current_block.blockStyle)
|
||||||
|
|
||||||
|
def process_image(self, path, tag_css, width=None, height=None):
|
||||||
|
def scale_image(width, height):
|
||||||
|
pt = PersistentTemporaryFile(suffix='.jpeg')
|
||||||
|
im.resize((int(width), int(height)), PILImage.ANTIALIAS).convert('RGB').save(pt, 'JPEG')
|
||||||
|
pt.close()
|
||||||
|
self.scaled_images[path] = pt
|
||||||
|
return pt.name
|
||||||
|
|
||||||
|
if self.scaled_images.has_key(path):
|
||||||
|
path = self.scaled_images[path].name
|
||||||
|
|
||||||
|
im = PILImage.open(path)
|
||||||
|
if width == None or height == None:
|
||||||
|
width, height = im.size
|
||||||
|
if height > self.profile.page_height:
|
||||||
|
corrf = self.profile.page_height/(1.*height)
|
||||||
|
width, height = floor(corrf*width), self.profile.page_height-1
|
||||||
|
if width > self.profile.page_width:
|
||||||
|
corrf = (self.profile.page_width)/(1.*width)
|
||||||
|
width, height = self.profile.page_width-1, floor(corrf*height)
|
||||||
|
path = scale_image(width, height)
|
||||||
|
if width > self.profile.page_width:
|
||||||
|
corrf = self.profile.page_width/(1.*width)
|
||||||
|
width, height = self.profile.page_width-1, floor(corrf*height)
|
||||||
|
if height > self.profile.page_height:
|
||||||
|
corrf = (self.profile.page_height)/(1.*height)
|
||||||
|
width, height = floor(corrf*width), self.profile.page_height-1
|
||||||
|
path = scale_image(width, height)
|
||||||
|
width, height = int(width), int(height)
|
||||||
|
|
||||||
|
if not self.images.has_key(path):
|
||||||
|
self.images[path] = ImageStream(path)
|
||||||
|
|
||||||
|
im = Image(self.images[path], x0=0, y0=0, x1=width, y1=height,\
|
||||||
|
xsize=width, ysize=height)
|
||||||
|
factor = 720./self.profile.dpi
|
||||||
|
|
||||||
|
self.process_alignment(tag_css)
|
||||||
|
|
||||||
|
if max(width, height) <= min(self.profile.page_width,
|
||||||
|
self.profile.page_height)/5.:
|
||||||
|
self.current_para.append(Plot(im, xsize=ceil(width*factor),
|
||||||
|
ysize=ceil(height*factor)))
|
||||||
|
elif height <= int(floor((2/3.)*self.profile.page_height)):
|
||||||
|
pb = self.current_block
|
||||||
|
self.end_current_para()
|
||||||
|
self.process_alignment(tag_css)
|
||||||
|
self.current_para.append(Plot(im, xsize=width*factor,
|
||||||
|
ysize=height*factor))
|
||||||
|
self.current_block.append(self.current_para)
|
||||||
|
self.current_page.append(self.current_block)
|
||||||
|
self.current_block = self.book.create_text_block(
|
||||||
|
textStyle=pb.textStyle,
|
||||||
|
blockStyle=pb.blockStyle)
|
||||||
|
self.current_para = Paragraph()
|
||||||
|
else:
|
||||||
|
self.end_page()
|
||||||
|
self.current_page.append(Canvas(width=self.profile.page_width,
|
||||||
|
height=height))
|
||||||
|
left = int(floor((self.profile.page_width - width)/2.))
|
||||||
|
self.current_page.contents[0].put_object(ImageBlock(self.images[path]),
|
||||||
|
left, 0)
|
||||||
|
|
||||||
def parse_tag(self, tag, parent_css):
|
def parse_tag(self, tag, parent_css):
|
||||||
try:
|
try:
|
||||||
tagname = tag.name.lower()
|
tagname = tag.name.lower()
|
||||||
@ -798,73 +862,21 @@ class HTMLConverter(object):
|
|||||||
path = unquote(purl[2])
|
path = unquote(purl[2])
|
||||||
if path and os.path.splitext(path)[1][1:].lower() in \
|
if path and os.path.splitext(path)[1][1:].lower() in \
|
||||||
['png', 'jpg', 'bmp', 'jpeg']:
|
['png', 'jpg', 'bmp', 'jpeg']:
|
||||||
self.add_image_page(path)
|
self.process_image(path, tag_css)
|
||||||
else:
|
else:
|
||||||
self.add_text('Link: ' + tag['href'], tag_css)
|
self.add_text('Link: ' + tag['href'], tag_css)
|
||||||
self.links.append(HTMLConverter.Link(self.current_para.contents[-1], tag))
|
self.links.append(HTMLConverter.Link(self.current_para.contents[-1], tag))
|
||||||
elif tagname == 'img':
|
elif tagname == 'img':
|
||||||
if tag.has_key('src') and os.access(unquote(tag['src']), os.R_OK):
|
if tag.has_key('src') and os.access(unquote(tag['src']), os.R_OK):
|
||||||
path = os.path.abspath(unquote(tag['src']))
|
path = os.path.abspath(unquote(tag['src']))
|
||||||
if self.scaled_images.has_key(path):
|
width, height = None, None
|
||||||
path = self.scaled_images[path].name
|
|
||||||
im = PILImage.open(path)
|
|
||||||
width, height = im.size
|
|
||||||
try:
|
try:
|
||||||
width = int(tag['width'])
|
width = int(tag['width'])
|
||||||
height = int(tag['height'])
|
height = int(tag['height'])
|
||||||
except:
|
except:
|
||||||
pass
|
pass
|
||||||
|
self.process_image(path, tag_css, width, height)
|
||||||
def scale_image(width, height):
|
|
||||||
pt = PersistentTemporaryFile(suffix='.jpeg')
|
|
||||||
im.resize((int(width), int(height)), PILImage.ANTIALIAS).convert('RGB').save(pt, 'JPEG')
|
|
||||||
pt.close()
|
|
||||||
self.scaled_images[path] = pt
|
|
||||||
return pt.name
|
|
||||||
|
|
||||||
|
|
||||||
if height > self.profile.page_height:
|
|
||||||
corrf = self.profile.page_height/(1.*height)
|
|
||||||
width, height = floor(corrf*width), self.profile.page_height-1
|
|
||||||
if width > self.profile.page_width:
|
|
||||||
corrf = (self.profile.page_width)/(1.*width)
|
|
||||||
width, height = self.profile.page_width-1, floor(corrf*height)
|
|
||||||
path = scale_image(width, height)
|
|
||||||
if width > self.profile.page_width:
|
|
||||||
corrf = self.profile.page_width/(1.*width)
|
|
||||||
width, height = self.profile.page_width-1, floor(corrf*height)
|
|
||||||
if height > self.profile.page_height:
|
|
||||||
corrf = (self.profile.page_height)/(1.*height)
|
|
||||||
width, height = floor(corrf*width), self.profile.page_height-1
|
|
||||||
path = scale_image(width, height)
|
|
||||||
width, height = int(width), int(height)
|
|
||||||
|
|
||||||
if not self.images.has_key(path):
|
|
||||||
self.images[path] = ImageStream(path)
|
|
||||||
factor = 720./self.profile.dpi
|
|
||||||
|
|
||||||
self.process_alignment(tag_css)
|
|
||||||
|
|
||||||
if max(width, height) <= min(self.profile.page_width,
|
|
||||||
self.profile.page_height)/5.:
|
|
||||||
im = Image(self.images[path], x0=0, y0=0, x1=width, y1=height,\
|
|
||||||
xsize=width, ysize=height)
|
|
||||||
self.current_para.append(Plot(im, xsize=ceil(width*factor),
|
|
||||||
ysize=ceil(height*factor)))
|
|
||||||
else:
|
|
||||||
pb = self.current_block
|
|
||||||
self.end_current_para()
|
|
||||||
self.process_alignment(tag_css)
|
|
||||||
im = Image(self.images[path], x0=0, y0=0, x1=width, y1=height,\
|
|
||||||
xsize=width, ysize=height)
|
|
||||||
self.current_para.append(Plot(im, xsize=width*factor,
|
|
||||||
ysize=height*factor))
|
|
||||||
self.current_block.append(self.current_para)
|
|
||||||
self.current_page.append(self.current_block)
|
|
||||||
self.current_block = self.book.create_text_block(
|
|
||||||
textStyle=pb.textStyle,
|
|
||||||
blockStyle=pb.blockStyle)
|
|
||||||
self.current_para = Paragraph()
|
|
||||||
else:
|
else:
|
||||||
print >>sys.stderr, "Failed to process:", tag
|
print >>sys.stderr, "Failed to process:", tag
|
||||||
elif tagname in ['style', 'link']:
|
elif tagname in ['style', 'link']:
|
||||||
@ -998,6 +1010,9 @@ class HTMLConverter(object):
|
|||||||
elif tagname in ['br', 'tr']:
|
elif tagname in ['br', 'tr']:
|
||||||
self.current_para.append(CR())
|
self.current_para.append(CR())
|
||||||
self.process_children(tag, tag_css)
|
self.process_children(tag, tag_css)
|
||||||
|
elif tagname in ['td']:
|
||||||
|
self.current_para.append(' ')
|
||||||
|
self.process_children(tag, tag_css)
|
||||||
elif tagname == 'hr':
|
elif tagname == 'hr':
|
||||||
self.end_current_para()
|
self.end_current_para()
|
||||||
self.current_block.append(CR())
|
self.current_block.append(CR())
|
||||||
@ -1208,6 +1223,9 @@ def main():
|
|||||||
try:
|
try:
|
||||||
options, args, parser = parse_options()
|
options, args, parser = parse_options()
|
||||||
src = args[0]
|
src = args[0]
|
||||||
|
if options.verbose:
|
||||||
|
import warnings
|
||||||
|
warnings.defaultaction = 'error'
|
||||||
except:
|
except:
|
||||||
sys.exit(1)
|
sys.exit(1)
|
||||||
process_file(src, options)
|
process_file(src, options)
|
||||||
|
@ -70,7 +70,7 @@
|
|||||||
|
|
||||||
<h2><a name='images'>Inline images</a></h2>
|
<h2><a name='images'>Inline images</a></h2>
|
||||||
<p>
|
<p>
|
||||||
Here I demonstrate the use of inline images in the midst of text. Here is a small image <img src='small.jpg' /> embedded in a sentence. Now we have a slightly larger image that is automatically put in its own block <img style="text-align:center" src='medium.jpg' /> and finally we have a large image which wont fit on this page. Try changing sizes from S to M to L and see how the images behave. <img align='center' src='large.jpg' />
|
Here I demonstrate the use of inline images in the midst of text. Here is a small image <img src='small.jpg' /> embedded in a sentence. Now we have a slightly larger image that is automatically put in its own block <img style="text-align:center" src='medium.jpg' /> and finally we have a large image which is put on a page by itself. Try changing sizes from S to M to L and see how the images behave. <img align='center' src='large.jpg' />
|
||||||
</p>
|
</p>
|
||||||
<p class='toc'>
|
<p class='toc'>
|
||||||
<hr />
|
<hr />
|
||||||
|
@ -144,7 +144,7 @@ class Delegator(object):
|
|||||||
d.parent = self
|
d.parent = self
|
||||||
methods = d.getMethods()
|
methods = d.getMethods()
|
||||||
self.delegatedMethods += methods
|
self.delegatedMethods += methods
|
||||||
for m in methods:
|
for m in methods:
|
||||||
setattr(self, m, getattr(d, m))
|
setattr(self, m, getattr(d, m))
|
||||||
|
|
||||||
"""
|
"""
|
||||||
@ -247,6 +247,7 @@ class LrsContainer(object):
|
|||||||
self.parent = None
|
self.parent = None
|
||||||
self.contents = []
|
self.contents = []
|
||||||
self.validChildren = validChildren
|
self.validChildren = validChildren
|
||||||
|
self.must_append = False
|
||||||
|
|
||||||
|
|
||||||
def has_text(self):
|
def has_text(self):
|
||||||
@ -259,7 +260,7 @@ class LrsContainer(object):
|
|||||||
if child.has_text():
|
if child.has_text():
|
||||||
return True
|
return True
|
||||||
for item in self.contents:
|
for item in self.contents:
|
||||||
if isinstance(item, (Plot, ImageBlock)):
|
if isinstance(item, (Plot, ImageBlock, Canvas)):
|
||||||
return True
|
return True
|
||||||
return False
|
return False
|
||||||
|
|
||||||
@ -268,7 +269,7 @@ class LrsContainer(object):
|
|||||||
Append self to C{parent} iff self has non whitespace textual content
|
Append self to C{parent} iff self has non whitespace textual content
|
||||||
@type parent: LrsContainer
|
@type parent: LrsContainer
|
||||||
'''
|
'''
|
||||||
if self.has_text():
|
if self.has_text() or self.must_append:
|
||||||
parent.append(self)
|
parent.append(self)
|
||||||
|
|
||||||
|
|
||||||
@ -425,7 +426,7 @@ class Book(Delegator):
|
|||||||
LrsObject.nextObjId += 1
|
LrsObject.nextObjId += 1
|
||||||
|
|
||||||
Delegator.__init__(self, [BookInformation(), Main(),
|
Delegator.__init__(self, [BookInformation(), Main(),
|
||||||
Template(), Style(), Solos(), Objects()])
|
Template(), Style(), Solos(), Objects()])
|
||||||
|
|
||||||
self.sourceencoding = None
|
self.sourceencoding = None
|
||||||
|
|
||||||
@ -506,7 +507,7 @@ class Book(Delegator):
|
|||||||
className = content.__class__.__name__
|
className = content.__class__.__name__
|
||||||
try:
|
try:
|
||||||
method = getattr(self, "append" + className)
|
method = getattr(self, "append" + className)
|
||||||
except AttributeError:
|
except AttributeError:
|
||||||
raise LrsError, "can't append %s to Book" % className
|
raise LrsError, "can't append %s to Book" % className
|
||||||
|
|
||||||
method(content)
|
method(content)
|
||||||
@ -1870,24 +1871,28 @@ class CharButton(LrsSimpleChar1, LrsContainer):
|
|||||||
class Objects(LrsContainer):
|
class Objects(LrsContainer):
|
||||||
def __init__(self):
|
def __init__(self):
|
||||||
LrsContainer.__init__(self, [JumpButton, TextBlock, HeaderOrFooter,
|
LrsContainer.__init__(self, [JumpButton, TextBlock, HeaderOrFooter,
|
||||||
ImageStream, Image])
|
ImageStream, Image, ImageBlock])
|
||||||
self.appendJumpButton = self.appendTextBlock = self.appendHeader = \
|
self.appendJumpButton = self.appendTextBlock = self.appendHeader = \
|
||||||
self.appendFooter = self.appendImageStream = \
|
self.appendFooter = self.appendImageStream = \
|
||||||
self.appendImage = self.append
|
self.appendImage = self.appendImageBlock = self.append
|
||||||
|
|
||||||
|
|
||||||
def getMethods(self):
|
def getMethods(self):
|
||||||
return ["JumpButton", "appendJumpButton", "TextBlock",
|
return ["JumpButton", "appendJumpButton", "TextBlock",
|
||||||
"appendTextBlock", "Header", "appendHeader",
|
"appendTextBlock", "Header", "appendHeader",
|
||||||
"Footer", "appendFooter",
|
"Footer", "appendFooter", "ImageBlock",
|
||||||
"ImageStream", "appendImageStream",
|
"ImageStream", "appendImageStream",
|
||||||
'Image','appendImage']
|
'Image','appendImage', 'appendImageBlock']
|
||||||
|
|
||||||
|
|
||||||
def getSettings(self):
|
def getSettings(self):
|
||||||
return []
|
return []
|
||||||
|
|
||||||
|
|
||||||
|
def ImageBlock(self, *args, **kwargs):
|
||||||
|
ib = ImageBlock(*args, **kwargs)
|
||||||
|
self.append(ib)
|
||||||
|
return ib
|
||||||
|
|
||||||
def JumpButton(self, textBlock):
|
def JumpButton(self, textBlock):
|
||||||
b = JumpButton(textBlock)
|
b = JumpButton(textBlock)
|
||||||
@ -2062,8 +2067,8 @@ class Canvas(LrsObject, LrsContainer, LrsAttributes):
|
|||||||
|
|
||||||
self.settings = self.defaults.copy()
|
self.settings = self.defaults.copy()
|
||||||
self.settings.update(settings)
|
self.settings.update(settings)
|
||||||
self.settings['canvasheight'] = height
|
self.settings['canvasheight'] = int(height)
|
||||||
self.settings['canvaswidth'] = width
|
self.settings['canvaswidth'] = int(width)
|
||||||
|
|
||||||
def put_object(self, obj, x, y):
|
def put_object(self, obj, x, y):
|
||||||
self.append(PutObj(obj, x=x, y=y))
|
self.append(PutObj(obj, x=x, y=y))
|
||||||
@ -2086,7 +2091,7 @@ class Canvas(LrsObject, LrsContainer, LrsAttributes):
|
|||||||
content.toLrfContainer(lrfWriter, stream)
|
content.toLrfContainer(lrfWriter, stream)
|
||||||
if lrfWriter.saveStreamTags: # true only if testing
|
if lrfWriter.saveStreamTags: # true only if testing
|
||||||
c.saveStreamTags = stream.tags
|
c.saveStreamTags = stream.tags
|
||||||
|
|
||||||
c.appendLrfTags(
|
c.appendLrfTags(
|
||||||
stream.getStreamTags(lrfWriter.getSourceEncoding(),
|
stream.getStreamTags(lrfWriter.getSourceEncoding(),
|
||||||
optimizeTags=lrfWriter.optimizeTags,
|
optimizeTags=lrfWriter.optimizeTags,
|
||||||
@ -2102,8 +2107,8 @@ class PutObj(LrsContainer):
|
|||||||
def __init__(self, content, x=0, y=0):
|
def __init__(self, content, x=0, y=0):
|
||||||
LrsContainer.__init__(self, [TextBlock, ImageBlock])
|
LrsContainer.__init__(self, [TextBlock, ImageBlock])
|
||||||
self.content = content
|
self.content = content
|
||||||
self.x1 = x
|
self.x1 = int(x)
|
||||||
self.y1 = y
|
self.y1 = int(y)
|
||||||
|
|
||||||
|
|
||||||
def appendReferencedObjects(self, parent):
|
def appendReferencedObjects(self, parent):
|
||||||
|
Loading…
x
Reference in New Issue
Block a user