Improve handling of images referenced in <a> tags and place large images in an image block

This commit is contained in:
Kovid Goyal 2007-05-18 17:57:00 +00:00
parent 44a50922cd
commit 05c1c36719
3 changed files with 111 additions and 88 deletions

View File

@ -36,7 +36,7 @@ from libprs500.ebooks.BeautifulSoup import BeautifulSoup, BeautifulStoneSoup, \
Comment, Tag, NavigableString, Declaration, ProcessingInstruction Comment, Tag, NavigableString, Declaration, ProcessingInstruction
from libprs500.ebooks.lrf.pylrs.pylrs import Paragraph, CR, Italic, ImageStream, \ from libprs500.ebooks.lrf.pylrs.pylrs import Paragraph, CR, Italic, ImageStream, \
TextBlock, ImageBlock, JumpButton, CharButton, Bold, Space, \ TextBlock, ImageBlock, JumpButton, CharButton, Bold, Space, \
Plot, Image, BlockSpace, RuledLine, BookSetting Plot, Image, BlockSpace, RuledLine, BookSetting, Canvas
from libprs500.ebooks.lrf.pylrs.pylrs import Span as _Span from libprs500.ebooks.lrf.pylrs.pylrs import Span as _Span
from libprs500.ebooks.lrf import ConversionError, option_parser, Book, PRS500_PROFILE from libprs500.ebooks.lrf import ConversionError, option_parser, Book, PRS500_PROFILE
from libprs500 import extract, filename_to_utf8 from libprs500 import extract, filename_to_utf8
@ -217,8 +217,10 @@ class HTMLConverter(object):
PAGE_BREAK_PAT = re.compile(r'page-break-(?:after|before)\s*:\s*(\w+)', re.IGNORECASE) PAGE_BREAK_PAT = re.compile(r'page-break-(?:after|before)\s*:\s*(\w+)', re.IGNORECASE)
IGNORED_TAGS = (Comment, Declaration, ProcessingInstruction) IGNORED_TAGS = (Comment, Declaration, ProcessingInstruction)
# Fix <a /> elements # Fix <a /> elements
MARKUP_MASSAGE = [(re.compile("(<\s*[aA]\s+.*\/)\s*>"), MARKUP_MASSAGE = [(re.compile("(<\s*[aA]\s+.*\/)\s*>"), #Close <a /> tags
lambda match: match.group(1)+"></a>"), lambda match: match.group(1)+"></a>"),
# Strip comments from <style> tags. This is needed as
# sometimes there are unterminated comments
(re.compile(r"<\s*style.*?>.*?(<\!--).*?<.\s*style\s*>", re.DOTALL|re.IGNORECASE), (re.compile(r"<\s*style.*?>.*?(<\!--).*?<.\s*style\s*>", re.DOTALL|re.IGNORECASE),
lambda match: match.group().replace('<!--', '').replace('-->', '')), lambda match: match.group().replace('<!--', '').replace('-->', '')),
] ]
@ -442,22 +444,20 @@ class HTMLConverter(object):
if not self.top.parent: if not self.top.parent:
if not previous: if not previous:
try: try:
previous = get_valid_block(self.book.pages()[0]) previous = self.book.pages()[0]
except IndexError: except IndexError:
previous = self.current_page raise ConversionError, self.file_name + ' does not seem to have any content'
else:
found = False found = False
for page in self.book.pages(): for page in self.book.pages():
if page == previous: if page == previous:
found = True found = True
continue
if found:
self.top = get_valid_block(page)
if not self.top:
continue continue
if found: break
self.top = get_valid_block(page)
if not self.top:
continue
break
if not self.top.parent:
self.top = get_valid_block(self.current_page)
if not self.top or not self.top.parent: if not self.top or not self.top.parent:
raise ConversionError, 'Could not parse ' + self.file_name raise ConversionError, 'Could not parse ' + self.file_name
@ -622,7 +622,8 @@ class HTMLConverter(object):
textheight=self.profile.screen_height) textheight=self.profile.screen_height)
if not self.images.has_key(path): if not self.images.has_key(path):
self.images[path] = ImageStream(path) self.images[path] = ImageStream(path)
page.append(ImageBlock(self.images[path])) ib = ImageBlock(self.images[path])
page.append(ib)
self.book.append(page) self.book.append(page)
def process_children(self, ptag, pcss): def process_children(self, ptag, pcss):
@ -725,6 +726,69 @@ class HTMLConverter(object):
self.current_block = self.book.create_text_block(textStyle=self.current_block.textStyle, self.current_block = self.book.create_text_block(textStyle=self.current_block.textStyle,
blockStyle=self.current_block.blockStyle) blockStyle=self.current_block.blockStyle)
def process_image(self, path, tag_css, width=None, height=None):
def scale_image(width, height):
pt = PersistentTemporaryFile(suffix='.jpeg')
im.resize((int(width), int(height)), PILImage.ANTIALIAS).convert('RGB').save(pt, 'JPEG')
pt.close()
self.scaled_images[path] = pt
return pt.name
if self.scaled_images.has_key(path):
path = self.scaled_images[path].name
im = PILImage.open(path)
if width == None or height == None:
width, height = im.size
if height > self.profile.page_height:
corrf = self.profile.page_height/(1.*height)
width, height = floor(corrf*width), self.profile.page_height-1
if width > self.profile.page_width:
corrf = (self.profile.page_width)/(1.*width)
width, height = self.profile.page_width-1, floor(corrf*height)
path = scale_image(width, height)
if width > self.profile.page_width:
corrf = self.profile.page_width/(1.*width)
width, height = self.profile.page_width-1, floor(corrf*height)
if height > self.profile.page_height:
corrf = (self.profile.page_height)/(1.*height)
width, height = floor(corrf*width), self.profile.page_height-1
path = scale_image(width, height)
width, height = int(width), int(height)
if not self.images.has_key(path):
self.images[path] = ImageStream(path)
im = Image(self.images[path], x0=0, y0=0, x1=width, y1=height,\
xsize=width, ysize=height)
factor = 720./self.profile.dpi
self.process_alignment(tag_css)
if max(width, height) <= min(self.profile.page_width,
self.profile.page_height)/5.:
self.current_para.append(Plot(im, xsize=ceil(width*factor),
ysize=ceil(height*factor)))
elif height <= int(floor((2/3.)*self.profile.page_height)):
pb = self.current_block
self.end_current_para()
self.process_alignment(tag_css)
self.current_para.append(Plot(im, xsize=width*factor,
ysize=height*factor))
self.current_block.append(self.current_para)
self.current_page.append(self.current_block)
self.current_block = self.book.create_text_block(
textStyle=pb.textStyle,
blockStyle=pb.blockStyle)
self.current_para = Paragraph()
else:
self.end_page()
self.current_page.append(Canvas(width=self.profile.page_width,
height=height))
left = int(floor((self.profile.page_width - width)/2.))
self.current_page.contents[0].put_object(ImageBlock(self.images[path]),
left, 0)
def parse_tag(self, tag, parent_css): def parse_tag(self, tag, parent_css):
try: try:
tagname = tag.name.lower() tagname = tag.name.lower()
@ -798,73 +862,21 @@ class HTMLConverter(object):
path = unquote(purl[2]) path = unquote(purl[2])
if path and os.path.splitext(path)[1][1:].lower() in \ if path and os.path.splitext(path)[1][1:].lower() in \
['png', 'jpg', 'bmp', 'jpeg']: ['png', 'jpg', 'bmp', 'jpeg']:
self.add_image_page(path) self.process_image(path, tag_css)
else: else:
self.add_text('Link: ' + tag['href'], tag_css) self.add_text('Link: ' + tag['href'], tag_css)
self.links.append(HTMLConverter.Link(self.current_para.contents[-1], tag)) self.links.append(HTMLConverter.Link(self.current_para.contents[-1], tag))
elif tagname == 'img': elif tagname == 'img':
if tag.has_key('src') and os.access(unquote(tag['src']), os.R_OK): if tag.has_key('src') and os.access(unquote(tag['src']), os.R_OK):
path = os.path.abspath(unquote(tag['src'])) path = os.path.abspath(unquote(tag['src']))
if self.scaled_images.has_key(path): width, height = None, None
path = self.scaled_images[path].name
im = PILImage.open(path)
width, height = im.size
try: try:
width = int(tag['width']) width = int(tag['width'])
height = int(tag['height']) height = int(tag['height'])
except: except:
pass pass
self.process_image(path, tag_css, width, height)
def scale_image(width, height):
pt = PersistentTemporaryFile(suffix='.jpeg')
im.resize((int(width), int(height)), PILImage.ANTIALIAS).convert('RGB').save(pt, 'JPEG')
pt.close()
self.scaled_images[path] = pt
return pt.name
if height > self.profile.page_height:
corrf = self.profile.page_height/(1.*height)
width, height = floor(corrf*width), self.profile.page_height-1
if width > self.profile.page_width:
corrf = (self.profile.page_width)/(1.*width)
width, height = self.profile.page_width-1, floor(corrf*height)
path = scale_image(width, height)
if width > self.profile.page_width:
corrf = self.profile.page_width/(1.*width)
width, height = self.profile.page_width-1, floor(corrf*height)
if height > self.profile.page_height:
corrf = (self.profile.page_height)/(1.*height)
width, height = floor(corrf*width), self.profile.page_height-1
path = scale_image(width, height)
width, height = int(width), int(height)
if not self.images.has_key(path):
self.images[path] = ImageStream(path)
factor = 720./self.profile.dpi
self.process_alignment(tag_css)
if max(width, height) <= min(self.profile.page_width,
self.profile.page_height)/5.:
im = Image(self.images[path], x0=0, y0=0, x1=width, y1=height,\
xsize=width, ysize=height)
self.current_para.append(Plot(im, xsize=ceil(width*factor),
ysize=ceil(height*factor)))
else:
pb = self.current_block
self.end_current_para()
self.process_alignment(tag_css)
im = Image(self.images[path], x0=0, y0=0, x1=width, y1=height,\
xsize=width, ysize=height)
self.current_para.append(Plot(im, xsize=width*factor,
ysize=height*factor))
self.current_block.append(self.current_para)
self.current_page.append(self.current_block)
self.current_block = self.book.create_text_block(
textStyle=pb.textStyle,
blockStyle=pb.blockStyle)
self.current_para = Paragraph()
else: else:
print >>sys.stderr, "Failed to process:", tag print >>sys.stderr, "Failed to process:", tag
elif tagname in ['style', 'link']: elif tagname in ['style', 'link']:
@ -998,6 +1010,9 @@ class HTMLConverter(object):
elif tagname in ['br', 'tr']: elif tagname in ['br', 'tr']:
self.current_para.append(CR()) self.current_para.append(CR())
self.process_children(tag, tag_css) self.process_children(tag, tag_css)
elif tagname in ['td']:
self.current_para.append(' ')
self.process_children(tag, tag_css)
elif tagname == 'hr': elif tagname == 'hr':
self.end_current_para() self.end_current_para()
self.current_block.append(CR()) self.current_block.append(CR())
@ -1208,6 +1223,9 @@ def main():
try: try:
options, args, parser = parse_options() options, args, parser = parse_options()
src = args[0] src = args[0]
if options.verbose:
import warnings
warnings.defaultaction = 'error'
except: except:
sys.exit(1) sys.exit(1)
process_file(src, options) process_file(src, options)

View File

@ -70,7 +70,7 @@
<h2><a name='images'>Inline images</a></h2> <h2><a name='images'>Inline images</a></h2>
<p> <p>
Here I demonstrate the use of inline images in the midst of text. Here is a small image <img src='small.jpg' /> embedded in a sentence. Now we have a slightly larger image that is automatically put in its own block <img style="text-align:center" src='medium.jpg' /> and finally we have a large image which wont fit on this page. Try changing sizes from S to M to L and see how the images behave. <img align='center' src='large.jpg' /> Here I demonstrate the use of inline images in the midst of text. Here is a small image <img src='small.jpg' /> embedded in a sentence. Now we have a slightly larger image that is automatically put in its own block <img style="text-align:center" src='medium.jpg' /> and finally we have a large image which is put on a page by itself. Try changing sizes from S to M to L and see how the images behave. <img align='center' src='large.jpg' />
</p> </p>
<p class='toc'> <p class='toc'>
<hr /> <hr />

View File

@ -144,7 +144,7 @@ class Delegator(object):
d.parent = self d.parent = self
methods = d.getMethods() methods = d.getMethods()
self.delegatedMethods += methods self.delegatedMethods += methods
for m in methods: for m in methods:
setattr(self, m, getattr(d, m)) setattr(self, m, getattr(d, m))
""" """
@ -247,6 +247,7 @@ class LrsContainer(object):
self.parent = None self.parent = None
self.contents = [] self.contents = []
self.validChildren = validChildren self.validChildren = validChildren
self.must_append = False
def has_text(self): def has_text(self):
@ -259,7 +260,7 @@ class LrsContainer(object):
if child.has_text(): if child.has_text():
return True return True
for item in self.contents: for item in self.contents:
if isinstance(item, (Plot, ImageBlock)): if isinstance(item, (Plot, ImageBlock, Canvas)):
return True return True
return False return False
@ -268,7 +269,7 @@ class LrsContainer(object):
Append self to C{parent} iff self has non whitespace textual content Append self to C{parent} iff self has non whitespace textual content
@type parent: LrsContainer @type parent: LrsContainer
''' '''
if self.has_text(): if self.has_text() or self.must_append:
parent.append(self) parent.append(self)
@ -425,7 +426,7 @@ class Book(Delegator):
LrsObject.nextObjId += 1 LrsObject.nextObjId += 1
Delegator.__init__(self, [BookInformation(), Main(), Delegator.__init__(self, [BookInformation(), Main(),
Template(), Style(), Solos(), Objects()]) Template(), Style(), Solos(), Objects()])
self.sourceencoding = None self.sourceencoding = None
@ -506,7 +507,7 @@ class Book(Delegator):
className = content.__class__.__name__ className = content.__class__.__name__
try: try:
method = getattr(self, "append" + className) method = getattr(self, "append" + className)
except AttributeError: except AttributeError:
raise LrsError, "can't append %s to Book" % className raise LrsError, "can't append %s to Book" % className
method(content) method(content)
@ -1870,24 +1871,28 @@ class CharButton(LrsSimpleChar1, LrsContainer):
class Objects(LrsContainer): class Objects(LrsContainer):
def __init__(self): def __init__(self):
LrsContainer.__init__(self, [JumpButton, TextBlock, HeaderOrFooter, LrsContainer.__init__(self, [JumpButton, TextBlock, HeaderOrFooter,
ImageStream, Image]) ImageStream, Image, ImageBlock])
self.appendJumpButton = self.appendTextBlock = self.appendHeader = \ self.appendJumpButton = self.appendTextBlock = self.appendHeader = \
self.appendFooter = self.appendImageStream = \ self.appendFooter = self.appendImageStream = \
self.appendImage = self.append self.appendImage = self.appendImageBlock = self.append
def getMethods(self): def getMethods(self):
return ["JumpButton", "appendJumpButton", "TextBlock", return ["JumpButton", "appendJumpButton", "TextBlock",
"appendTextBlock", "Header", "appendHeader", "appendTextBlock", "Header", "appendHeader",
"Footer", "appendFooter", "Footer", "appendFooter", "ImageBlock",
"ImageStream", "appendImageStream", "ImageStream", "appendImageStream",
'Image','appendImage'] 'Image','appendImage', 'appendImageBlock']
def getSettings(self): def getSettings(self):
return [] return []
def ImageBlock(self, *args, **kwargs):
ib = ImageBlock(*args, **kwargs)
self.append(ib)
return ib
def JumpButton(self, textBlock): def JumpButton(self, textBlock):
b = JumpButton(textBlock) b = JumpButton(textBlock)
@ -2062,8 +2067,8 @@ class Canvas(LrsObject, LrsContainer, LrsAttributes):
self.settings = self.defaults.copy() self.settings = self.defaults.copy()
self.settings.update(settings) self.settings.update(settings)
self.settings['canvasheight'] = height self.settings['canvasheight'] = int(height)
self.settings['canvaswidth'] = width self.settings['canvaswidth'] = int(width)
def put_object(self, obj, x, y): def put_object(self, obj, x, y):
self.append(PutObj(obj, x=x, y=y)) self.append(PutObj(obj, x=x, y=y))
@ -2086,7 +2091,7 @@ class Canvas(LrsObject, LrsContainer, LrsAttributes):
content.toLrfContainer(lrfWriter, stream) content.toLrfContainer(lrfWriter, stream)
if lrfWriter.saveStreamTags: # true only if testing if lrfWriter.saveStreamTags: # true only if testing
c.saveStreamTags = stream.tags c.saveStreamTags = stream.tags
c.appendLrfTags( c.appendLrfTags(
stream.getStreamTags(lrfWriter.getSourceEncoding(), stream.getStreamTags(lrfWriter.getSourceEncoding(),
optimizeTags=lrfWriter.optimizeTags, optimizeTags=lrfWriter.optimizeTags,
@ -2102,8 +2107,8 @@ class PutObj(LrsContainer):
def __init__(self, content, x=0, y=0): def __init__(self, content, x=0, y=0):
LrsContainer.__init__(self, [TextBlock, ImageBlock]) LrsContainer.__init__(self, [TextBlock, ImageBlock])
self.content = content self.content = content
self.x1 = x self.x1 = int(x)
self.y1 = y self.y1 = int(y)
def appendReferencedObjects(self, parent): def appendReferencedObjects(self, parent):