Fix #4194 (Calibre 0.6.27 Guardian Recipe not working)

This commit is contained in:
Kovid Goyal 2009-12-13 12:44:53 -07:00
parent a436d54952
commit df45c88ac4

View File

@ -6,7 +6,7 @@ __docformat__ = 'restructuredtext en'
''' '''
www.guardian.co.uk www.guardian.co.uk
''' '''
from calibre import strftime #from calibre import strftime
from calibre.web.feeds.news import BasicNewsRecipe from calibre.web.feeds.news import BasicNewsRecipe
class Guardian(BasicNewsRecipe): class Guardian(BasicNewsRecipe):
@ -15,8 +15,8 @@ class Guardian(BasicNewsRecipe):
__author__ = 'Seabound and Sujata Raman' __author__ = 'Seabound and Sujata Raman'
language = 'en_GB' language = 'en_GB'
#oldest_article = 7 oldest_article = 7
#max_articles_per_feed = 100 max_articles_per_feed = 25
remove_javascript = True remove_javascript = True
timefmt = ' [%a, %d %b %Y]' timefmt = ' [%a, %d %b %Y]'
@ -43,6 +43,41 @@ class Guardian(BasicNewsRecipe):
#match-stats-summary{font-size:small; font-family:Arial,Helvetica,sans-serif;font-weight:normal;} #match-stats-summary{font-size:small; font-family:Arial,Helvetica,sans-serif;font-weight:normal;}
''' '''
feeds = [
('Front Page', 'http://www.guardian.co.uk/rss'),
('Business', 'http://www.guardian.co.uk/business/rss'),
('Sport', 'http://www.guardian.co.uk/sport/rss'),
('Culture', 'http://www.guardian.co.uk/culture/rss'),
('Money', 'http://www.guardian.co.uk/money/rss'),
('Life & Style', 'http://www.guardian.co.uk/lifeandstyle/rss'),
('Travel', 'http://www.guardian.co.uk/travel/rss'),
('Environment', 'http://www.guardian.co.uk/environment/rss'),
('Comment','http://www.guardian.co.uk/commentisfree/rss'),
]
def get_article_url(self, article):
url = article.get('guid', None)
if '/video/' in url or '/flyer/' in url or '/quiz/' in url or \
'/gallery/' in url or 'ivebeenthere' in url or \
'pickthescore' in url or 'audioslideshow' in url :
url = None
return url
def preprocess_html(self, soup):
for item in soup.findAll(style=True):
del item['style']
for item in soup.findAll(face=True):
del item['face']
for tag in soup.findAll(name=['ul','li']):
tag.name = 'div'
return soup
'''
def find_sections(self): def find_sections(self):
soup = self.index_to_soup('http://www.guardian.co.uk/theguardian') soup = self.index_to_soup('http://www.guardian.co.uk/theguardian')
# find cover pic # find cover pic
@ -82,20 +117,7 @@ class Guardian(BasicNewsRecipe):
for title, href in self.find_sections(): for title, href in self.find_sections():
feeds.append((title, list(self.find_articles(href)))) feeds.append((title, list(self.find_articles(href))))
return feeds return feeds
'''
def preprocess_html(self, soup):
for item in soup.findAll(style=True):
del item['style']
for item in soup.findAll(face=True):
del item['face']
for tag in soup.findAll(name=['ul','li']):
tag.name = 'div'
return soup