From 05c1c36719e25da2dc10b75222a3a6a42bcf5540 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Fri, 18 May 2007 17:57:00 +0000 Subject: [PATCH] Improve handling of images referenced in tags and place large images in an image block --- src/libprs500/ebooks/lrf/html/convert_from.py | 164 ++++++++++-------- src/libprs500/ebooks/lrf/html/demo/demo.html | 2 +- src/libprs500/ebooks/lrf/pylrs/pylrs.py | 33 ++-- 3 files changed, 111 insertions(+), 88 deletions(-) diff --git a/src/libprs500/ebooks/lrf/html/convert_from.py b/src/libprs500/ebooks/lrf/html/convert_from.py index 1ce17eb6ba..272a9dbf70 100644 --- a/src/libprs500/ebooks/lrf/html/convert_from.py +++ b/src/libprs500/ebooks/lrf/html/convert_from.py @@ -36,7 +36,7 @@ from libprs500.ebooks.BeautifulSoup import BeautifulSoup, BeautifulStoneSoup, \ Comment, Tag, NavigableString, Declaration, ProcessingInstruction from libprs500.ebooks.lrf.pylrs.pylrs import Paragraph, CR, Italic, ImageStream, \ TextBlock, ImageBlock, JumpButton, CharButton, Bold, Space, \ - Plot, Image, BlockSpace, RuledLine, BookSetting + Plot, Image, BlockSpace, RuledLine, BookSetting, Canvas from libprs500.ebooks.lrf.pylrs.pylrs import Span as _Span from libprs500.ebooks.lrf import ConversionError, option_parser, Book, PRS500_PROFILE from libprs500 import extract, filename_to_utf8 @@ -217,8 +217,10 @@ class HTMLConverter(object): PAGE_BREAK_PAT = re.compile(r'page-break-(?:after|before)\s*:\s*(\w+)', re.IGNORECASE) IGNORED_TAGS = (Comment, Declaration, ProcessingInstruction) # Fix elements - MARKUP_MASSAGE = [(re.compile("(<\s*[aA]\s+.*\/)\s*>"), + MARKUP_MASSAGE = [(re.compile("(<\s*[aA]\s+.*\/)\s*>"), #Close tags lambda match: match.group(1)+">"), + # Strip comments from