diff --git a/resources/recipes/20_minutos.recipe b/resources/recipes/20_minutos.recipe index 8205c918f5..1f862847dc 100644 --- a/resources/recipes/20_minutos.recipe +++ b/resources/recipes/20_minutos.recipe @@ -1,17 +1,67 @@ +# -*- coding: utf-8 +__license__ = 'GPL v3' +__author__ = 'Luis Hernandez' +__copyright__ = 'Luis Hernandez' +description = 'Periódico gratuito en español - v0.5 - 25 Jan 2011' + +''' +www.20minutos.es +''' + from calibre.web.feeds.news import BasicNewsRecipe -class AdvancedUserRecipe1295310874(BasicNewsRecipe): - title = u'20 Minutos (Boletin)' - __author__ = 'Luis Hernandez' - description = 'Periódico gratuito en español' +class AdvancedUserRecipe1294946868(BasicNewsRecipe): + + title = u'20 Minutos' + publisher = u'Grupo 20 Minutos' + + __author__ = u'Luis Hernández' + description = u'Periódico gratuito en español' cover_url = 'http://estaticos.20minutos.es/mmedia/especiales/corporativo/css/img/logotipos_grupo20minutos.gif' - language = 'es' - oldest_article = 2 - max_articles_per_feed = 50 + oldest_article = 5 + max_articles_per_feed = 100 + + remove_javascript = True + no_stylesheets = True + use_embedded_content = False + + encoding = 'ISO-8859-1' + language = 'es' + timefmt = '[%a, %d %b, %Y]' + + keep_only_tags = [dict(name='div', attrs={'id':['content']}) + ,dict(name='div', attrs={'class':['boxed','description','lead','article-content']}) + ,dict(name='span', attrs={'class':['photo-bar']}) + ,dict(name='ul', attrs={'class':['article-author']}) + ] + + remove_tags_before = dict(name='ul' , attrs={'class':['servicios-sub']}) + remove_tags_after = dict(name='div' , attrs={'class':['related-news','col']}) + + remove_tags = [ + dict(name='ol', attrs={'class':['navigation',]}) + ,dict(name='span', attrs={'class':['action']}) + ,dict(name='div', attrs={'class':['twitter comments-list hidden','related-news','col']}) + ,dict(name='div', attrs={'id':['twitter-destacados']}) + ,dict(name='ul', attrs={'class':['article-user-actions','stripped-list']}) + ] + + feeds = [ + (u'Portada' , u'http://www.20minutos.es/rss/') + ,(u'Nacional' , u'http://www.20minutos.es/rss/nacional/') + ,(u'Internacional' , u'http://www.20minutos.es/rss/internacional/') + ,(u'Economia' , u'http://www.20minutos.es/rss/economia/') + ,(u'Deportes' , u'http://www.20minutos.es/rss/deportes/') + ,(u'Tecnologia' , u'http://www.20minutos.es/rss/tecnologia/') + ,(u'Gente - TV' , u'http://www.20minutos.es/rss/gente-television/') + ,(u'Motor' , u'http://www.20minutos.es/rss/motor/') + ,(u'Salud' , u'http://www.20minutos.es/rss/belleza-y-salud/') + ,(u'Viajes' , u'http://www.20minutos.es/rss/viajes/') + ,(u'Vivienda' , u'http://www.20minutos.es/rss/vivienda/') + ,(u'Empleo' , u'http://www.20minutos.es/rss/empleo/') + ,(u'Cine' , u'http://www.20minutos.es/rss/cine/') + ,(u'Musica' , u'http://www.20minutos.es/rss/musica/') + ,(u'Comunidad20' , u'http://www.20minutos.es/rss/zona20/') + ] - feeds = [(u'VESPERTINO', u'http://20minutos.feedsportal.com/c/32489/f/478284/index.rss') - , (u'DEPORTES', u'http://20minutos.feedsportal.com/c/32489/f/478286/index.rss') - , (u'CULTURA', u'http://www.20minutos.es/rss/ocio/') - , (u'TV', u'http://20minutos.feedsportal.com/c/32489/f/490877/index.rss') -] diff --git a/resources/recipes/nytimes_sub.recipe b/resources/recipes/nytimes_sub.recipe index 863e4b22ba..81b8bd5cb7 100644 --- a/resources/recipes/nytimes_sub.recipe +++ b/resources/recipes/nytimes_sub.recipe @@ -498,7 +498,7 @@ class NYTimes(BasicNewsRecipe): for lidiv in div.findAll('li'): if not skipping: self.handle_article(lidiv) - + self.ans = [(k, self.articles[k]) for k in self.ans if self.articles.has_key(k)] return self.filter_ans(self.ans) @@ -609,7 +609,7 @@ class NYTimes(BasicNewsRecipe): if article_date < self.earliest_date: self.log("Skipping article dated %s" % date_str) return None - + #all articles are from today, no need to print the date on every page try: if not self.webEdition: @@ -631,7 +631,7 @@ class NYTimes(BasicNewsRecipe): refstart = reflinkstring.find("javascript:pop_me_up2('") + len("javascript:pop_me_up2('") refend = reflinkstring.find(".html", refstart) + len(".html") reflinkstring = reflinkstring[refstart:refend] - + popuppage = self.browser.open(reflinkstring) popuphtml = popuppage.read() popuppage.close() @@ -640,7 +640,7 @@ class NYTimes(BasicNewsRecipe): year = str(st.tm_year) month = "%.2d" % st.tm_mon day = "%.2d" % st.tm_mday - imgstartpos = popuphtml.find('http://graphics8.nytimes.com/images/' + year + '/' + month +'/' + day +'/') + len('http://graphics8.nytimes.com/images/' + year + '/' + month +'/' + day +'/') + imgstartpos = popuphtml.find('http://graphics8.nytimes.com/images/' + year + '/' + month +'/' + day +'/') + len('http://graphics8.nytimes.com/images/' + year + '/' + month +'/' + day +'/') highResImageLink = 'http://graphics8.nytimes.com/images/' + year + '/' + month +'/' + day +'/' + popuphtml[imgstartpos:popuphtml.find('.jpg',imgstartpos)+4] popupSoup = BeautifulSoup(popuphtml) highResTag = popupSoup.find('img', {'src':highResImageLink}) @@ -659,9 +659,9 @@ class NYTimes(BasicNewsRecipe): imageTag['height'] = newHeight except: self.log("Error setting the src width and height parameters") - except Exception as e: + except Exception: self.log("Error pulling high resolution images") - + try: #remove "Related content" bar runAroundsFound = soup.findAll('div',{'class':['articleInline runaroundLeft','articleInline doubleRule runaroundLeft','articleInline runaroundLeft firstArticleInline']}) @@ -674,8 +674,8 @@ class NYTimes(BasicNewsRecipe): hline.extract() except: self.log("Error removing related content bar") - - + + try: #in case pulling images failed, delete the enlarge this text enlargeThisList = soup.findAll('div',{'class':'icon enlargeThis'}) diff --git a/src/calibre/gui2/actions/annotate.py b/src/calibre/gui2/actions/annotate.py index 8714654d4b..a702ba045e 100644 --- a/src/calibre/gui2/actions/annotate.py +++ b/src/calibre/gui2/actions/annotate.py @@ -9,7 +9,7 @@ import os, datetime from PyQt4.Qt import pyqtSignal, QModelIndex, QThread, Qt -from calibre.gui2 import error_dialog, gprefs +from calibre.gui2 import error_dialog from calibre.ebooks.BeautifulSoup import BeautifulSoup, Tag, NavigableString from calibre import strftime from calibre.gui2.actions import InterfaceAction