Conversion: Do not error out when tags have unparseable color attributes. Fixes #1053583 (Problem while Fetching Indian Express news- repeating)

This commit is contained in:
Kovid Goyal 2012-09-21 09:17:27 +05:30
parent 8069522db7
commit bc669cb342
2 changed files with 6 additions and 3 deletions

View File

@ -13,8 +13,10 @@ class IndianExpress(BasicNewsRecipe):
#remove_tags_after = dict(name='td', attrs={'class':'newptool1'}) #remove_tags_after = dict(name='td', attrs={'class':'newptool1'})
remove_tags = [ remove_tags = [
dict(name='iframe'), dict(name='iframe'),
dict(name='div', attrs={'class':['bookmarks_div', 'comment_box', 'bookmarks_div_bot', 'box']}), dict(name='div', attrs={'class':['pagination_new', 'comments_desc',
dict(name='div', attrs={'id':['footer', 'tab_innerhc', 'discussion', 'google_new']}), 'content_right', 'bookmarks_div', 'comment_box', 'bookmarks_div_bot', 'box']}),
dict(name='div', attrs={'id':['footer', 'tab_innerhc', 'discussion',
'google_new', 'header_new', 'slidebox']}),
dict(name='a', attrs={'class':'nobdr'}), dict(name='a', attrs={'class':'nobdr'}),
#dict(name='span', text=':'), #dict(name='span', text=':'),
] ]

View File

@ -8,6 +8,7 @@ __copyright__ = '2008, Marshall T. Vandegrift <llasram@gmail.com>'
import re, operator, math import re, operator, math
from collections import defaultdict from collections import defaultdict
from xml.dom import SyntaxErr
from lxml import etree from lxml import etree
import cssutils import cssutils
@ -279,7 +280,7 @@ class CSSFlattener(object):
if 'color' in node.attrib: if 'color' in node.attrib:
try: try:
cssdict['color'] = Property('color', node.attrib['color']).value cssdict['color'] = Property('color', node.attrib['color']).value
except ValueError: except (ValueError, SyntaxErr):
pass pass
del node.attrib['color'] del node.attrib['color']
if 'bgcolor' in node.attrib: if 'bgcolor' in node.attrib: