Improved recipe for The Guardian

This commit is contained in:
Kovid Goyal 2009-08-19 10:24:43 -06:00
parent cf3582d450
commit cbee0d32bf

View File

@ -8,12 +8,11 @@ www.guardian.co.uk
''' '''
from calibre.web.feeds.news import BasicNewsRecipe from calibre.web.feeds.news import BasicNewsRecipe
from calibre.ebooks.BeautifulSoup import BeautifulSoup, Tag
class Guardian(BasicNewsRecipe): class Guardian(BasicNewsRecipe):
title = u'The Guardian' title = u'The Guardian'
__author__ = 'Seabound' __author__ = 'Seabound and Sujata Raman'
language = _('English') language = _('English')
oldest_article = 7 oldest_article = 7
max_articles_per_feed = 20 max_articles_per_feed = 20
@ -37,10 +36,10 @@ class Guardian(BasicNewsRecipe):
.h1{font-size: large ;font-family:georgia,serif; font-weight:bold;} .h1{font-size: large ;font-family:georgia,serif; font-weight:bold;}
.stand-first-alone{color:#666666; font-size:small; font-family:Arial,Helvetica,sans-serif;} .stand-first-alone{color:#666666; font-size:small; font-family:Arial,Helvetica,sans-serif;}
.caption{color:#666666; font-size:x-small; font-family:Arial,Helvetica,sans-serif;} .caption{color:#666666; font-size:x-small; font-family:Arial,Helvetica,sans-serif;}
#article-wrapper{font-size:small; font-family:Arial,Helvetica,sans-serif;} #article-wrapper{font-size:small; font-family:Arial,Helvetica,sans-serif;font-weight:normal;}
.main-article-info{font-family:Arial,Helvetica,sans-serif;} .main-article-info{font-family:Arial,Helvetica,sans-serif;}
#full-contents{font-size:small; font-family:Arial,Helvetica,sans-serif;} #full-contents{font-size:small; font-family:Arial,Helvetica,sans-serif;font-weight:normal;}
#match-stats-summary{font-size:small; font-family:Arial,Helvetica,sans-serif;} #match-stats-summary{font-size:small; font-family:Arial,Helvetica,sans-serif;font-weight:normal;}
''' '''
@ -57,6 +56,15 @@ class Guardian(BasicNewsRecipe):
('Comment','http://www.guardian.co.uk/commentisfree/rss'), ('Comment','http://www.guardian.co.uk/commentisfree/rss'),
] ]
def get_article_url(self, article):
url = article.get('guid', None)
if '/video/' in url or '/flyer/' in url or '/quiz/' in url or \
'/gallery/' in url or 'ivebeenthere' in url or \
'pickthescore' in url or 'audioslideshow' in url :
url = None
return url
def preprocess_html(self, soup): def preprocess_html(self, soup):