diff --git a/src/libprs500/lrf/html/convert_from.py b/src/libprs500/lrf/html/convert_from.py
index 5aacd49820..9d8feecfd1 100644
--- a/src/libprs500/lrf/html/convert_from.py
+++ b/src/libprs500/lrf/html/convert_from.py
@@ -20,14 +20,16 @@ Code to convert HTML ebooks into LRF ebooks.
import os, re, sys
from htmlentitydefs import name2codepoint
from optparse import OptionParser
+from urllib import urlopen
from libprs500.lrf.html.BeautifulSoup import BeautifulSoup, Comment, Tag, NavigableString
-from libprs500.lrf.pylrs.pylrs import Book, Page, Paragraph, TextBlock, CR, Italic
+from libprs500.lrf.pylrs.pylrs import Book, Page, Paragraph, TextBlock, \
+ CR, Italic, ImageStream, ImageBlock
from libprs500.lrf.pylrs.pylrs import Span as _Span
from libprs500.lrf import ConversionError
class Span(_Span):
- replaced_entities = [ 'amp', 'lt', 'gt' , 'ldquo', 'rdquo', 'lsquo', 'rsquo' ]
+ replaced_entities = [ 'amp', 'lt', 'gt' , 'ldquo', 'rdquo', 'lsquo', 'rsquo', 'nbsp' ]
patterns = [ re.compile('&'+i+';') for i in replaced_entities ]
targets = [ unichr(name2codepoint[i]) for i in replaced_entities ]
rules = zip(patterns, targets)
@@ -283,8 +285,8 @@ class HTMLConvertor(object):
for key in css.keys():
test = key.lower()
if test.startswith('margin') or 'indent' in test or \
- 'padding' in test or 'border' in test or test in \
- ['color', 'display', 'text-decoration', 'letter-spacing']:
+ 'padding' in test or 'border' in test or 'page-break' in test \
+ or test in ['color', 'display', 'text-decoration', 'letter-spacing']:
css.pop(key)
return css
@@ -321,9 +323,29 @@ class HTMLConvertor(object):
return
if tagname in ["title", "script", "meta"]:
pass
+ elif tagname == 'img':
+ if tag.has_key('src'):
+ if os.access(tag['src'], os.R_OK):
+ self.current_block.append(self.current_para)
+ self.current_page.append(self.current_block)
+ ib = ImageBlock(ImageStream(tag['src']))
+ self.current_page.append(ib)
+ self.current_block = TextBlock()
+ self.current_para = Paragraph()
elif tagname in ['style', 'link']:
- # TODO: Append CSS to self.css
- pass
+ if tagname == 'style':
+ for c in tag.contents:
+ if isinstance(c,NavigableString):
+ self.css.update(self.parse_css(str(c)))
+ elif tag.has_key('type') and tag['type'] == "text/css" \
+ and tag.has_key('href'):
+ url = tag['href']
+ if url.startswith('http://'):
+ f = urlopen(url)
+ else:
+ f = open(url)
+ self.css.update(f.read())
+ f.close()
elif tagname == 'p':
css = self.tag_css(tag, parent_css=parent_css)
indent = css.pop('text-indent', '')