Fix CSS parsing bugs in html2lrf

This commit is contained in:
Kovid Goyal 2008-03-12 23:31:21 +00:00
parent 32b5ebf861
commit 1cc8b55879

View File

@ -351,7 +351,6 @@ class HTMLConverter(object):
self.logger.info('Written preprocessed HTML to '+dump.name) self.logger.info('Written preprocessed HTML to '+dump.name)
dump.close() dump.close()
#print soup
return soup return soup
def add_file(self, path): def add_file(self, path):
@ -382,6 +381,7 @@ class HTMLConverter(object):
self.tops[path] = self.parse_file(soup) self.tops[path] = self.parse_file(soup)
self.processed_files.append(path) self.processed_files.append(path)
def parse_css(self, style): def parse_css(self, style):
""" """
Parse the contents of a <style> tag or .css file. Parse the contents of a <style> tag or .css file.
@ -467,6 +467,8 @@ class HTMLConverter(object):
prop.update(self.css[classname]) prop.update(self.css[classname])
if self.pseudo_css.has_key(classname): if self.pseudo_css.has_key(classname):
pprop.update(self.pseudo_css[classname]) pprop.update(self.pseudo_css[classname])
if tag.has_key('id') and self.css.has_key(tag['id']):
prop.update(self.css[tag['id']])
if tag.has_key("style"): if tag.has_key("style"):
prop.update(self.parse_style_properties(tag["style"])) prop.update(self.parse_style_properties(tag["style"]))
return prop, pprop return prop, pprop
@ -1147,6 +1149,7 @@ class HTMLConverter(object):
if ans is not None: if ans is not None:
ans += int(self.font_delta * 20) ans += int(self.font_delta * 20)
ans = str(ans) ans = str(ans)
return ans return ans
family, weight, style, variant = 'serif', 'normal', 'normal', None family, weight, style, variant = 'serif', 'normal', 'normal', None
@ -1216,7 +1219,7 @@ class HTMLConverter(object):
result = int(val) result = int(val)
except ValueError: except ValueError:
pass pass
m = re.match("\s*(-*[0-9]*\.?[0-9]*)\s*(%|em|px|mm|cm|in|pt|pc)", val) m = re.search(r"\s*(-*[0-9]*\.?[0-9]*)\s*(%|em|px|mm|cm|in|pt|pc)", val)
if m is not None and m.group(1): if m is not None and m.group(1):
unit = float(m.group(1)) unit = float(m.group(1))
if m.group(2) == '%': if m.group(2) == '%':
@ -1424,11 +1427,10 @@ class HTMLConverter(object):
elif tagname in ['style', 'link']: elif tagname in ['style', 'link']:
ncss, npcss = {}, {} ncss, npcss = {}, {}
if tagname == 'style': if tagname == 'style':
for c in tag.contents: text = ''.join([unicode(i) for i in tag.findAll(text=True)])
if isinstance(c, NavigableString): css, pcss = self.parse_css(text)
css, pcss = self.parse_css(str(c)) ncss.update(css)
ncss.update(css) npcss.update(pcss)
npcss.update(pcss)
elif tag.has_key('type') and tag['type'] == "text/css" \ elif tag.has_key('type') and tag['type'] == "text/css" \
and tag.has_key('href'): and tag.has_key('href'):
path = munge_paths(self.target_prefix, tag['href'])[0] path = munge_paths(self.target_prefix, tag['href'])[0]