From 1cc8b55879f26a021525f995bd432b4ae4b547cd Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Wed, 12 Mar 2008 23:31:21 +0000 Subject: [PATCH] Fix CSS parsing bugs in html2lrf --- src/libprs500/ebooks/lrf/html/convert_from.py | 20 ++++++++++--------- 1 file changed, 11 insertions(+), 9 deletions(-) diff --git a/src/libprs500/ebooks/lrf/html/convert_from.py b/src/libprs500/ebooks/lrf/html/convert_from.py index ae0cf8163c..dd53da807e 100644 --- a/src/libprs500/ebooks/lrf/html/convert_from.py +++ b/src/libprs500/ebooks/lrf/html/convert_from.py @@ -351,7 +351,6 @@ class HTMLConverter(object): self.logger.info('Written preprocessed HTML to '+dump.name) dump.close() - #print soup return soup def add_file(self, path): @@ -380,7 +379,8 @@ class HTMLConverter(object): self.target_prefix = path self.previous_text = '\n' self.tops[path] = self.parse_file(soup) - self.processed_files.append(path) + self.processed_files.append(path) + def parse_css(self, style): """ @@ -467,6 +467,8 @@ class HTMLConverter(object): prop.update(self.css[classname]) if self.pseudo_css.has_key(classname): pprop.update(self.pseudo_css[classname]) + if tag.has_key('id') and self.css.has_key(tag['id']): + prop.update(self.css[tag['id']]) if tag.has_key("style"): prop.update(self.parse_style_properties(tag["style"])) return prop, pprop @@ -1146,7 +1148,8 @@ class HTMLConverter(object): ans = 120 if ans is not None: ans += int(self.font_delta * 20) - ans = str(ans) + ans = str(ans) + return ans family, weight, style, variant = 'serif', 'normal', 'normal', None @@ -1216,7 +1219,7 @@ class HTMLConverter(object): result = int(val) except ValueError: pass - m = re.match("\s*(-*[0-9]*\.?[0-9]*)\s*(%|em|px|mm|cm|in|pt|pc)", val) + m = re.search(r"\s*(-*[0-9]*\.?[0-9]*)\s*(%|em|px|mm|cm|in|pt|pc)", val) if m is not None and m.group(1): unit = float(m.group(1)) if m.group(2) == '%': @@ -1424,11 +1427,10 @@ class HTMLConverter(object): elif tagname in ['style', 'link']: ncss, npcss = {}, {} if tagname == 'style': - for c in tag.contents: - if isinstance(c, NavigableString): - css, pcss = self.parse_css(str(c)) - ncss.update(css) - npcss.update(pcss) + text = ''.join([unicode(i) for i in tag.findAll(text=True)]) + css, pcss = self.parse_css(text) + ncss.update(css) + npcss.update(pcss) elif tag.has_key('type') and tag['type'] == "text/css" \ and tag.has_key('href'): path = munge_paths(self.target_prefix, tag['href'])[0]