diff --git a/src/calibre/gui2/images/news/times_online.png b/src/calibre/gui2/images/news/times_online.png new file mode 100644 index 0000000000..c9cd67d546 Binary files /dev/null and b/src/calibre/gui2/images/news/times_online.png differ diff --git a/src/calibre/web/feeds/recipes/recipe_newsweek.py b/src/calibre/web/feeds/recipes/recipe_newsweek.py index 863bbb10a4..54e54a9a83 100644 --- a/src/calibre/web/feeds/recipes/recipe_newsweek.py +++ b/src/calibre/web/feeds/recipes/recipe_newsweek.py @@ -2,7 +2,7 @@ __license__ = 'GPL v3' __copyright__ = '2008, Kovid Goyal ' -import re, time +import re from calibre import strftime from calibre.web.feeds.news import BasicNewsRecipe @@ -12,6 +12,7 @@ class Newsweek(BasicNewsRecipe): __author__ = 'Kovid Goyal' description = 'Weekly news and current affairs in the US' no_stylesheets = True + encoding = 'utf-8' language = _('English') remove_tags = [ {'class':['navbar', 'ad', 'sponsorLinksArticle', 'mm-content', @@ -30,12 +31,12 @@ class Newsweek(BasicNewsRecipe): def find_title(self, section): d = {'scope':'Scope', 'thetake':'The Take', 'features':'Features', - None:'Departments'} + None:'Departments', 'culture':'Culture'} ans = None a = section.find('a', attrs={'name':True}) if a is not None: ans = a['name'] - return d[ans] + return d.get(ans, ans) def find_articles(self, section): @@ -64,14 +65,6 @@ class Newsweek(BasicNewsRecipe): soup = self.get_current_issue() if not soup: raise RuntimeError('Unable to connect to newsweek.com. Try again later.') - img = soup.find(alt='Cover') - if img is not None and img.has_key('src'): - small = img['src'] - match = re.search(r'(\d+)_', small.rpartition('/')[-1]) - if match is not None: - self.timefmt = strftime(' [%d %b, %Y]', time.strptime(match.group(1), '%y%m%d')) - self.cover_url = small.replace('coversmall', 'coverlarge') - sections = soup.findAll('div', attrs={'class':'featurewell'}) titles = map(self.find_title, sections) articles = map(self.find_articles, sections) @@ -114,3 +107,12 @@ class Newsweek(BasicNewsRecipe): href = a['href'].split('#')[0] return self.index_to_soup(href) + def get_cover_url(self): + cover_url = None + soup = self.index_to_soup(self.INDEX) + link_item = soup.find('div',attrs={'class':'cover-image'}) + if link_item and link_item.a and link_item.a.img: + cover_url = link_item.a.img['src'] + return cover_url + + diff --git a/src/calibre/web/feeds/recipes/recipe_times_online.py b/src/calibre/web/feeds/recipes/recipe_times_online.py index 45e9a1e10e..f652ddc62c 100644 --- a/src/calibre/web/feeds/recipes/recipe_times_online.py +++ b/src/calibre/web/feeds/recipes/recipe_times_online.py @@ -1,42 +1,65 @@ -#!/usr/bin/env python - -__license__ = 'GPL v3' -__copyright__ = '2008, Darko Miletic ' -''' -timesonline.co.uk -''' - -from calibre.web.feeds.news import BasicNewsRecipe - -class TimesOnline(BasicNewsRecipe): - title = u'The Times Online' - __author__ = 'Darko Miletic' - description = 'UK news' - oldest_article = 7 - max_articles_per_feed = 100 - no_stylesheets = True - use_embedded_content = False - language = _('English') - simultaneous_downloads = 1 - - remove_tags_after = dict(name='div', attrs={'class':'bg-666'}) - remove_tags = [ - dict(name='div' , attrs={'class':'hide-from-print padding-bottom-7' }) - ] - - feeds = [ - (u'Top stories from Times Online', u'http://www.timesonline.co.uk/tol/feeds/rss/topstories.xml' ), - ('Latest Business News', 'http://www.timesonline.co.uk/tol/feeds/rss/business.xml'), - ('Economics', 'http://www.timesonline.co.uk/tol/feeds/rss/economics.xml'), - ('World News', 'http://www.timesonline.co.uk/tol/feeds/rss/worldnews.xml'), - ('UK News', 'http://www.timesonline.co.uk/tol/feeds/rss/uknews.xml'), - ('Travel News', 'http://www.timesonline.co.uk/tol/feeds/rss/travel.xml'), - ('Sports News', 'http://www.timesonline.co.uk/tol/feeds/rss/sport.xml'), - ('Film News', 'http://www.timesonline.co.uk/tol/feeds/rss/film.xml'), - ('Tech news', 'http://www.timesonline.co.uk/tol/feeds/rss/tech.xml'), - ('Literary Supplement', 'http://www.timesonline.co.uk/tol/feeds/rss/thetls.xml'), - ] - - def print_version(self, url): - main = url.partition('#')[0] - return main + '?print=yes' \ No newline at end of file +#!/usr/bin/env python + +__license__ = 'GPL v3' +__copyright__ = '2008-2009, Darko Miletic ' +''' +timesonline.co.uk +''' + +from calibre.web.feeds.news import BasicNewsRecipe +from calibre.ebooks.BeautifulSoup import BeautifulSoup, Tag + +class Timesonline(BasicNewsRecipe): + title = 'The Times Online' + __author__ = 'Darko Miletic' + description = 'UK news' + publisher = 'timesonline.co.uk' + category = 'news, politics, UK' + oldest_article = 2 + max_articles_per_feed = 100 + no_stylesheets = True + use_embedded_content = False + simultaneous_downloads = 1 + encoding = 'cp1252' + lang = 'en-UK' + language = _('English') + + html2lrf_options = [ + '--comment', description + , '--category', category + , '--publisher', publisher + ] + + html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"' + + remove_tags = [dict(name=['embed','object'])] + remove_tags_after = dict(name='div', attrs={'class':'bg-666'}) + + feeds = [ + (u'Top stories from Times Online', u'http://www.timesonline.co.uk/tol/feeds/rss/topstories.xml' ), + ('Latest Business News', 'http://www.timesonline.co.uk/tol/feeds/rss/business.xml'), + ('Economics', 'http://www.timesonline.co.uk/tol/feeds/rss/economics.xml'), + ('World News', 'http://www.timesonline.co.uk/tol/feeds/rss/worldnews.xml'), + ('UK News', 'http://www.timesonline.co.uk/tol/feeds/rss/uknews.xml'), + ('Travel News', 'http://www.timesonline.co.uk/tol/feeds/rss/travel.xml'), + ('Sports News', 'http://www.timesonline.co.uk/tol/feeds/rss/sport.xml'), + ('Film News', 'http://www.timesonline.co.uk/tol/feeds/rss/film.xml'), + ('Tech news', 'http://www.timesonline.co.uk/tol/feeds/rss/tech.xml'), + ('Literary Supplement', 'http://www.timesonline.co.uk/tol/feeds/rss/thetls.xml'), + ] + + def print_version(self, url): + return url + '?print=yes' + + def get_article_url(self, article): + return article.get('guid', None) + + def preprocess_html(self, soup): + soup.html['xml:lang'] = self.lang + soup.html['lang'] = self.lang + mlang = Tag(soup,'meta',[("http-equiv","Content-Language"),("content",self.lang)]) + mcharset = Tag(soup,'meta',[("http-equiv","Content-Type"),("content","text/html; charset=UTF-8")]) + soup.head.insert(0,mlang) + soup.head.insert(1,mcharset) + return self.adeify_images(soup) + \ No newline at end of file