From 379a5f1a5a9e43388d1e30c6f98024d18ea08449 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Thu, 26 Apr 2007 17:36:18 +0000 Subject: [PATCH] Fix handling of ignored tags in html2lrf --- src/libprs500/lrf/html/convert_from.py | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/src/libprs500/lrf/html/convert_from.py b/src/libprs500/lrf/html/convert_from.py index f251d3d0f7..2712b6fb00 100644 --- a/src/libprs500/lrf/html/convert_from.py +++ b/src/libprs500/lrf/html/convert_from.py @@ -28,7 +28,7 @@ from tempfile import mkdtemp from operator import itemgetter from libprs500.lrf.html.BeautifulSoup import BeautifulSoup, Comment, Tag, \ - NavigableString, Declaration + NavigableString, Declaration, ProcessingInstruction from libprs500.lrf.pylrs.pylrs import Paragraph, CR, Italic, ImageStream, TextBlock, \ ImageBlock, JumpButton, CharButton, \ Page, Bold, Space, Plot, TextStyle, Image @@ -204,7 +204,8 @@ class Span(_Span): class HTMLConverter(object): - selector_pat = re.compile(r"([A-Za-z0-9\-\_\:\.]+[A-Za-z0-9\-\_\:\.\s\,]*)\s*\{([^\}]*)\}") + selector_pat = re.compile(r"([A-Za-z0-9\-\_\:\.]+[A-Za-z0-9\-\_\:\.\s\,]*)\s*\{([^\}]*)\}") + IGNORED_TAGS = (Comment, Declaration, ProcessingInstruction) class Link(object): def __init__(self, para, tag): @@ -457,7 +458,7 @@ class HTMLConverter(object): def process_children(self, ptag, pcss): """ Process the children of ptag """ for c in ptag.contents: - if isinstance(c, (Comment, Declaration)): + if isinstance(c, HTMLConverter.IGNORED_TAGS): continue elif isinstance(c, Tag): self.parse_tag(c, pcss) @@ -526,7 +527,8 @@ class HTMLConverter(object): try: tagname = tag.name.lower() except AttributeError: - self.add_text(tag, parent_css) + if not isinstance(tag, HTMLConverter.IGNORED_TAGS): + self.add_text(tag, parent_css) return tag_css = self.tag_css(tag, parent_css=parent_css) try: # Skip element if its display attribute is set to none