Added support for <img>, <link> and <style> tags to html2lrf

2025-07-09 03:04:10 -04:00 · 2007-04-19 03:13:02 +00:00 · 2007-04-19 03:13:02 +00:00 · 22872ee668
commit 22872ee668
parent d69fad53f4
1 changed files with 28 additions and 6 deletions
--- a/src/libprs500/lrf/html/convert_from.py
+++ b/src/libprs500/lrf/html/convert_from.py
@ -20,14 +20,16 @@ Code to convert HTML ebooks into LRF ebooks.
 import os, re, sys
 from htmlentitydefs import name2codepoint
 from optparse import OptionParser
 from urllib import urlopen
 from libprs500.lrf.html.BeautifulSoup import BeautifulSoup, Comment, Tag, NavigableString
-from libprs500.lrf.pylrs.pylrs import Book, Page, Paragraph, TextBlock, CR, Italic
+from libprs500.lrf.pylrs.pylrs import Book, Page, Paragraph, TextBlock, \
                                      CR, Italic, ImageStream, ImageBlock
 from libprs500.lrf.pylrs.pylrs import Span as _Span
 from libprs500.lrf import ConversionError
 class Span(_Span):
-    replaced_entities = [ 'amp', 'lt', 'gt' , 'ldquo', 'rdquo', 'lsquo', 'rsquo' ]
+    replaced_entities = [ 'amp', 'lt', 'gt' , 'ldquo', 'rdquo', 'lsquo', 'rsquo', 'nbsp' ]
    patterns = [ re.compile('&'+i+';') for i in replaced_entities ]
    targets  = [ unichr(name2codepoint[i]) for i in replaced_entities ]
    rules = zip(patterns, targets)
@ -283,8 +285,8 @@ class HTMLConvertor(object):
            for key in css.keys():
                test = key.lower()
                if test.startswith('margin') or 'indent' in test or \
-                   'padding' in test or 'border' in test or test in \
+                   'padding' in test or 'border' in test or 'page-break' in test \
-                   ['color', 'display', 'text-decoration', 'letter-spacing']:
+                   or test in ['color', 'display', 'text-decoration', 'letter-spacing']:
                    css.pop(key)
            return css
@ -321,9 +323,29 @@ class HTMLConvertor(object):
            return
        if tagname in ["title", "script", "meta"]:
            pass
        elif tagname == 'img':
            if tag.has_key('src'):
                if os.access(tag['src'], os.R_OK):
                    self.current_block.append(self.current_para)
                    self.current_page.append(self.current_block)
                    ib = ImageBlock(ImageStream(tag['src']))
                    self.current_page.append(ib)
                    self.current_block = TextBlock()
                    self.current_para  = Paragraph()                                                    
        elif tagname in ['style', 'link']:
-            # TODO: Append CSS to self.css
+            if tagname == 'style':
-            pass
+                for c in tag.contents:
                    if isinstance(c,NavigableString):
                        self.css.update(self.parse_css(str(c)))
            elif tag.has_key('type') and tag['type'] == "text/css" \
                    and tag.has_key('href'):
                url = tag['href']
                if url.startswith('http://'):
                    f = urlopen(url)
                else:
                    f = open(url)
                self.css.update(f.read())
                f.close()
        elif tagname == 'p':
            css = self.tag_css(tag, parent_css=parent_css)
            indent = css.pop('text-indent', '')