Fix CSS parsing bugs in html2lrf

2025-07-09 03:04:10 -04:00 · 2008-03-12 23:31:21 +00:00 · 2008-03-12 23:31:21 +00:00 · 1cc8b55879
commit 1cc8b55879
parent 32b5ebf861
1 changed files with 11 additions and 9 deletions
--- a/src/libprs500/ebooks/lrf/html/convert_from.py
+++ b/src/libprs500/ebooks/lrf/html/convert_from.py
@ -351,7 +351,6 @@ class HTMLConverter(object):
            self.logger.info('Written preprocessed HTML to '+dump.name)
            dump.close()
        #print soup
        return soup
    def add_file(self, path):
@ -382,6 +381,7 @@ class HTMLConverter(object):
        self.tops[path] = self.parse_file(soup)
        self.processed_files.append(path)        
    def parse_css(self, style):
        """
        Parse the contents of a <style> tag or .css file.
@ -467,6 +467,8 @@ class HTMLConverter(object):
                        prop.update(self.css[classname])
                    if self.pseudo_css.has_key(classname):
                        pprop.update(self.pseudo_css[classname])
        if tag.has_key('id') and self.css.has_key(tag['id']):
            prop.update(self.css[tag['id']])
        if tag.has_key("style"):
            prop.update(self.parse_style_properties(tag["style"]))
        return prop, pprop
@ -1147,6 +1149,7 @@ class HTMLConverter(object):
            if ans is not None: 
                ans += int(self.font_delta * 20)
                ans = str(ans)
            return ans
        family, weight, style, variant = 'serif', 'normal', 'normal', None
@ -1216,7 +1219,7 @@ class HTMLConverter(object):
            result = int(val)
        except ValueError:
            pass
-        m = re.match("\s*(-*[0-9]*\.?[0-9]*)\s*(%|em|px|mm|cm|in|pt|pc)", val)
+        m = re.search(r"\s*(-*[0-9]*\.?[0-9]*)\s*(%|em|px|mm|cm|in|pt|pc)", val)
        if m is not None and m.group(1):
            unit = float(m.group(1))
            if m.group(2) == '%':
@ -1424,11 +1427,10 @@ class HTMLConverter(object):
            elif tagname in ['style', 'link']:
                ncss, npcss = {}, {}
                if tagname == 'style':
-                    for c in tag.contents:
+                    text = ''.join([unicode(i) for i in tag.findAll(text=True)])
-                        if isinstance(c, NavigableString):
+                    css, pcss = self.parse_css(text)
-                            css, pcss = self.parse_css(str(c))
+                    ncss.update(css)
-                            ncss.update(css)
+                    npcss.update(pcss)
                            npcss.update(pcss)
                elif tag.has_key('type') and tag['type'] == "text/css" \
                        and tag.has_key('href'):
                    path = munge_paths(self.target_prefix, tag['href'])[0]