diff --git a/src/calibre/web/feeds/recipes/__init__.py b/src/calibre/web/feeds/recipes/__init__.py index 9d892b9d40..644e0d2bb1 100644 --- a/src/calibre/web/feeds/recipes/__init__.py +++ b/src/calibre/web/feeds/recipes/__init__.py @@ -20,7 +20,7 @@ recipe_modules = ['recipe_' + r for r in ( 'science_news', 'the_nation', 'lrb', 'harpers_full', 'liberation', 'linux_magazine', 'telegraph_uk', 'utne', 'sciencedaily', 'forbes', 'time_magazine', 'endgadget', 'fudzilla', 'nspm_int', 'nspm', 'pescanik', - 'spiegel_int', 'themarketticker', 'tomshardware', + 'spiegel_int', 'themarketticker', 'tomshardware', 'xkcd', )] import re, imp, inspect, time, os diff --git a/src/calibre/web/feeds/recipes/recipe_time_magazine.py b/src/calibre/web/feeds/recipes/recipe_time_magazine.py index 3d28bc39ce..026aa233db 100644 --- a/src/calibre/web/feeds/recipes/recipe_time_magazine.py +++ b/src/calibre/web/feeds/recipes/recipe_time_magazine.py @@ -6,22 +6,28 @@ __copyright__ = '2008, Darko Miletic ' time.com ''' -from calibre.ebooks.BeautifulSoup import BeautifulSoup from calibre.web.feeds.news import BasicNewsRecipe class Time(BasicNewsRecipe): title = u'Time' - __author__ = 'Darko Miletic' + __author__ = 'Kovid Goyal' description = 'Weekly magazine' oldest_article = 7 max_articles_per_feed = 100 no_stylesheets = True use_embedded_content = False - #cover_url = 'http://img.timeinc.net/time/rd/trunk/www/web/feds/i/logo_time_home.gif' - keep_only_tags = [dict(name='div', attrs={'class':'tout1'})] - remove_tags = [dict(name='ul', attrs={'class':['button', 'find']})] + remove_tags_after = [dict(id='connectStory')] + remove_tags = [ + dict(name='ul', attrs={'class':['button', 'find']}), + dict(name='div', attrs={'class':['nav', 'header', 'sectheader', + 'searchWrap', 'subNav', + 'artTools', 'connect', + 'similarrecs']}), + dict(name='div', id=['articleSideBar', 'connectStory']), + dict(name='dl', id=['links']), + ] feeds = [ (u'Top Stories', u'http://feedproxy.google.com/time/topstories') @@ -34,17 +40,20 @@ class Time(BasicNewsRecipe): ,(u'Travel', u'http://feedproxy.google.com/time/travel') ] + def get_article_url(self, article): + return article.get('guid', article['link']) + def get_cover_url(self): soup = self.index_to_soup('http://www.time.com/time/') img = soup.find('img', alt='Current Time.com Cover', width='107') if img is not None: return img.get('src', None) - def print_version(self, url): - raw = self.browser.open(url).read() - soup = BeautifulSoup(raw.decode('utf8', 'replace')) - print_link = soup.find('a', {'id':'prt'}) - if print_link is None: - return '' - return 'http://www.time.com' + print_link['href'] + try: + soup = self.index_to_soup(url) + print_link = soup.find('a', {'id':'prt'}) + return 'http://www.time.com' + print_link['href'] + except: + self.log_exception('Failed to find print version for '+url) + return '' diff --git a/src/calibre/web/feeds/recipes/recipe_xkcd.py b/src/calibre/web/feeds/recipes/recipe_xkcd.py new file mode 100644 index 0000000000..f76cf5614e --- /dev/null +++ b/src/calibre/web/feeds/recipes/recipe_xkcd.py @@ -0,0 +1,36 @@ +__license__ = 'GPL v3' +__copyright__ = '2008, Kovid Goyal ' + +''' +Fetch xkcd. +''' + +import time +from calibre.web.feeds.news import BasicNewsRecipe + +class XkcdCom(BasicNewsRecipe): + title = 'xkcd' + description = 'A webcomic of romance and math humor.' + __author__ = 'Martin Pitt' + use_embedded_content = False + oldest_article = 60 + keep_only_tags = [dict(id='middleContent')] + remove_tags = [dict(name='ul'), dict(name='h3'), dict(name='br')] + no_stylesheets = True + + def parse_index(self): + INDEX = 'http://xkcd.com/archive/' + + soup = self.index_to_soup(INDEX) + articles = [] + for item in soup.findAll('a', title=True): + articles.append({ + 'date': item['title'], + 'timestamp': time.mktime(time.strptime(item['title'], '%Y-%m-%d'))+1, + 'url': 'http://xkcd.com' + item['href'], + 'title': self.tag_to_string(item).encode('UTF-8'), + 'description': '', + 'content': '', + }) + + return [('xkcd', articles)]