...

2025-07-09 03:04:10 -04:00 · 2011-01-25 09:38:38 -07:00 · 2011-01-25 09:38:38 -07:00 · 336874d87f
commit 336874d87f
parent 6ca1aa93e4
3 changed files with 71 additions and 21 deletions
--- a/resources/recipes/20_minutos.recipe
+++ b/resources/recipes/20_minutos.recipe
@ -1,17 +1,67 @@
+# -*- coding: utf-8
+__license__   = 'GPL v3'
+__author__    = 'Luis Hernandez'
+__copyright__ = 'Luis Hernandez<tolyluis@gmail.com>'
+description   = 'Periódico gratuito en español - v0.5 - 25 Jan 2011'
+
+'''
+www.20minutos.es
+'''
+
 from calibre.web.feeds.news import BasicNewsRecipe

-class AdvancedUserRecipe1295310874(BasicNewsRecipe):
-    title          = u'20 Minutos (Boletin)'
-    __author__            = 'Luis Hernandez'
-    description           = 'Periódico gratuito en español'
+class AdvancedUserRecipe1294946868(BasicNewsRecipe):
+
+    title          = u'20 Minutos'
+    publisher      = u'Grupo 20 Minutos'
+
+    __author__            = u'Luis Hernández'
+    description           = u'Periódico gratuito en español'
    cover_url     = 'http://estaticos.20minutos.es/mmedia/especiales/corporativo/css/img/logotipos_grupo20minutos.gif'
-    language      = 'es'

-    oldest_article = 2
-    max_articles_per_feed = 50
+    oldest_article = 5
+    max_articles_per_feed = 100
+
+    remove_javascript = True
+    no_stylesheets        = True
+    use_embedded_content  = False
+
+    encoding              = 'ISO-8859-1'
+    language              = 'es'
+    timefmt        = '[%a, %d %b, %Y]'
+
+    keep_only_tags     = [dict(name='div', attrs={'id':['content']})
+                                  ,dict(name='div', attrs={'class':['boxed','description','lead','article-content']})
+                                  ,dict(name='span', attrs={'class':['photo-bar']})
+                                  ,dict(name='ul', attrs={'class':['article-author']})
+                                ]
+
+    remove_tags_before = dict(name='ul' , attrs={'class':['servicios-sub']})
+    remove_tags_after  = dict(name='div' , attrs={'class':['related-news','col']})
+
+    remove_tags = [
+                     dict(name='ol', attrs={'class':['navigation',]})
+                    ,dict(name='span', attrs={'class':['action']})
+                    ,dict(name='div', attrs={'class':['twitter comments-list hidden','related-news','col']})
+                    ,dict(name='div', attrs={'id':['twitter-destacados']})
+                    ,dict(name='ul', attrs={'class':['article-user-actions','stripped-list']})
+                                          ]
+
+    feeds = [
+              (u'Portada'              , u'http://www.20minutos.es/rss/')
+             ,(u'Nacional'             , u'http://www.20minutos.es/rss/nacional/')
+             ,(u'Internacional'       , u'http://www.20minutos.es/rss/internacional/')
+             ,(u'Economia'           , u'http://www.20minutos.es/rss/economia/')
+             ,(u'Deportes'            , u'http://www.20minutos.es/rss/deportes/')
+             ,(u'Tecnologia'          , u'http://www.20minutos.es/rss/tecnologia/')
+             ,(u'Gente - TV'         , u'http://www.20minutos.es/rss/gente-television/')
+             ,(u'Motor'                 , u'http://www.20minutos.es/rss/motor/')
+             ,(u'Salud'                 , u'http://www.20minutos.es/rss/belleza-y-salud/')
+             ,(u'Viajes'                , u'http://www.20minutos.es/rss/viajes/')
+             ,(u'Vivienda'             , u'http://www.20minutos.es/rss/vivienda/')
+             ,(u'Empleo'              , u'http://www.20minutos.es/rss/empleo/')
+             ,(u'Cine'                  , u'http://www.20minutos.es/rss/cine/')
+             ,(u'Musica'               , u'http://www.20minutos.es/rss/musica/')
+             ,(u'Comunidad20'     , u'http://www.20minutos.es/rss/zona20/')
+            ]

-    feeds          = [(u'VESPERTINO', u'http://20minutos.feedsportal.com/c/32489/f/478284/index.rss')
-                        , (u'DEPORTES', u'http://20minutos.feedsportal.com/c/32489/f/478286/index.rss')
-                        , (u'CULTURA', u'http://www.20minutos.es/rss/ocio/')
-                        , (u'TV', u'http://20minutos.feedsportal.com/c/32489/f/490877/index.rss')
-]
--- a/resources/recipes/nytimes_sub.recipe
+++ b/resources/recipes/nytimes_sub.recipe
@ -498,7 +498,7 @@ class NYTimes(BasicNewsRecipe):
                for lidiv in div.findAll('li'):
                    if not skipping:
                        self.handle_article(lidiv)
-            
+
        self.ans = [(k, self.articles[k]) for k in self.ans if self.articles.has_key(k)]
        return self.filter_ans(self.ans)

@ -609,7 +609,7 @@ class NYTimes(BasicNewsRecipe):
                if article_date < self.earliest_date:
                    self.log("Skipping article dated %s" % date_str)
                    return None
-                    
+
        #all articles are from today, no need to print the date on every page
        try:
            if not self.webEdition:
@ -631,7 +631,7 @@ class NYTimes(BasicNewsRecipe):
                            refstart = reflinkstring.find("javascript:pop_me_up2('") + len("javascript:pop_me_up2('")
                            refend = reflinkstring.find(".html", refstart) + len(".html")
                            reflinkstring = reflinkstring[refstart:refend]
-                            
+
                            popuppage = self.browser.open(reflinkstring)
                            popuphtml = popuppage.read()
                            popuppage.close()
@ -640,7 +640,7 @@ class NYTimes(BasicNewsRecipe):
                                year = str(st.tm_year)
                                month = "%.2d" % st.tm_mon
                                day = "%.2d" % st.tm_mday
-                                imgstartpos = popuphtml.find('http://graphics8.nytimes.com/images/' + year + '/' +  month +'/' + day +'/') + len('http://graphics8.nytimes.com/images/' + year + '/' +  month +'/' + day +'/')                                
+                                imgstartpos = popuphtml.find('http://graphics8.nytimes.com/images/' + year + '/' +  month +'/' + day +'/') + len('http://graphics8.nytimes.com/images/' + year + '/' +  month +'/' + day +'/')
                                highResImageLink = 'http://graphics8.nytimes.com/images/' + year + '/' +  month +'/' + day +'/' + popuphtml[imgstartpos:popuphtml.find('.jpg',imgstartpos)+4]
                                popupSoup = BeautifulSoup(popuphtml)
                                highResTag = popupSoup.find('img', {'src':highResImageLink})
@ -659,9 +659,9 @@ class NYTimes(BasicNewsRecipe):
                                            imageTag['height'] = newHeight
                                        except:
                                            self.log("Error setting the src width and height parameters")
-            except Exception as e:
+            except Exception:
                self.log("Error pulling high resolution images")
-                
+
            try:
                #remove "Related content" bar
                runAroundsFound = soup.findAll('div',{'class':['articleInline runaroundLeft','articleInline doubleRule runaroundLeft','articleInline runaroundLeft firstArticleInline']})
@ -674,8 +674,8 @@ class NYTimes(BasicNewsRecipe):
                                hline.extract()
            except:
                self.log("Error removing related content bar")
-     
-                
+
+
            try:
                #in case pulling images failed, delete the enlarge this text
                enlargeThisList = soup.findAll('div',{'class':'icon enlargeThis'})
--- a/src/calibre/gui2/actions/annotate.py
+++ b/src/calibre/gui2/actions/annotate.py
@ -9,7 +9,7 @@ import os, datetime

 from PyQt4.Qt import pyqtSignal, QModelIndex, QThread, Qt

-from calibre.gui2 import error_dialog, gprefs
+from calibre.gui2 import error_dialog
 from calibre.ebooks.BeautifulSoup import BeautifulSoup, Tag, NavigableString
 from calibre import strftime
 from calibre.gui2.actions import InterfaceAction