Fix #4032 (NYT wont complete)

This commit is contained in:
Kovid Goyal 2009-11-29 13:25:11 -07:00
parent 5b6c033c40
commit 152738b691
4 changed files with 10 additions and 6 deletions

View File

@ -10,7 +10,7 @@ class IrishIndependent(BasicNewsRecipe):
title = u'Irish Independent'
description = 'Irish and World news from Irelands Bestselling Daily Broadsheet'
__author__ = 'Neil Grogan'
language = 'en_UK'
language = 'en_GB'
oldest_article = 7
max_articles_per_feed = 100
remove_tags_before = dict(id='article')

View File

@ -14,7 +14,7 @@ class NYTimes(BasicNewsRecipe):
title = 'New York Times Top Stories'
__author__ = 'GRiker'
language = _('English')
language = 'en'
description = 'Top Stories from the New York Times'
# List of sections typically included in Top Stories. Use a keyword from the

View File

@ -22,7 +22,10 @@ class NYTimes(BasicNewsRecipe):
remove_tags_before = dict(id='article')
remove_tags_after = dict(id='article')
remove_tags = [dict(attrs={'class':['articleTools', 'post-tools', 'side_tool', 'nextArticleLink clearfix']}),
dict(id=['footer', 'toolsRight', 'articleInline', 'navigation', 'archive', 'side_search', 'blog_sidebar', 'side_tool', 'side_index']),
dict(id=['footer', 'toolsRight', 'articleInline',
'navigation', 'archive', 'side_search', 'blog_sidebar',
'side_tool', 'side_index',
'relatedArticles', 'relatedTopics', 'adxSponLink']),
dict(name=['script', 'noscript', 'style'])]
encoding = 'cp1252'
no_stylesheets = True

View File

@ -850,6 +850,7 @@ class Manifest(object):
data = etree.fromstring(data)
except:
data = data.replace(':=', '=').replace(':>', '>')
data = data.replace('<http:/>', '')
try:
data = etree.fromstring(data)
except etree.XMLSyntaxError: