Fix handling of ignored tags in html2lrf

This commit is contained in:
Kovid Goyal 2007-04-26 17:36:18 +00:00
parent 44158dfc3c
commit 379a5f1a5a

View File

@ -28,7 +28,7 @@ from tempfile import mkdtemp
from operator import itemgetter from operator import itemgetter
from libprs500.lrf.html.BeautifulSoup import BeautifulSoup, Comment, Tag, \ from libprs500.lrf.html.BeautifulSoup import BeautifulSoup, Comment, Tag, \
NavigableString, Declaration NavigableString, Declaration, ProcessingInstruction
from libprs500.lrf.pylrs.pylrs import Paragraph, CR, Italic, ImageStream, TextBlock, \ from libprs500.lrf.pylrs.pylrs import Paragraph, CR, Italic, ImageStream, TextBlock, \
ImageBlock, JumpButton, CharButton, \ ImageBlock, JumpButton, CharButton, \
Page, Bold, Space, Plot, TextStyle, Image Page, Bold, Space, Plot, TextStyle, Image
@ -205,6 +205,7 @@ class Span(_Span):
class HTMLConverter(object): class HTMLConverter(object):
selector_pat = re.compile(r"([A-Za-z0-9\-\_\:\.]+[A-Za-z0-9\-\_\:\.\s\,]*)\s*\{([^\}]*)\}") selector_pat = re.compile(r"([A-Za-z0-9\-\_\:\.]+[A-Za-z0-9\-\_\:\.\s\,]*)\s*\{([^\}]*)\}")
IGNORED_TAGS = (Comment, Declaration, ProcessingInstruction)
class Link(object): class Link(object):
def __init__(self, para, tag): def __init__(self, para, tag):
@ -457,7 +458,7 @@ class HTMLConverter(object):
def process_children(self, ptag, pcss): def process_children(self, ptag, pcss):
""" Process the children of ptag """ """ Process the children of ptag """
for c in ptag.contents: for c in ptag.contents:
if isinstance(c, (Comment, Declaration)): if isinstance(c, HTMLConverter.IGNORED_TAGS):
continue continue
elif isinstance(c, Tag): elif isinstance(c, Tag):
self.parse_tag(c, pcss) self.parse_tag(c, pcss)
@ -526,6 +527,7 @@ class HTMLConverter(object):
try: try:
tagname = tag.name.lower() tagname = tag.name.lower()
except AttributeError: except AttributeError:
if not isinstance(tag, HTMLConverter.IGNORED_TAGS):
self.add_text(tag, parent_css) self.add_text(tag, parent_css)
return return
tag_css = self.tag_css(tag, parent_css=parent_css) tag_css = self.tag_css(tag, parent_css=parent_css)