Fix #4876 (Updated recipes)

2025-08-30 23:00:21 -04:00 · 2010-02-12 09:51:58 -07:00 · 2010-02-12 09:51:58 -07:00 · a3052cf127
commit a3052cf127
parent ba2b5056b0
2 changed files with 55 additions and 49 deletions
--- a/resources/recipes/pagina12.recipe
+++ b/resources/recipes/pagina12.recipe
@ -15,14 +15,14 @@ class Pagina12(BasicNewsRecipe):
    publisher             = 'La Pagina S.A.'
    category              = 'news, politics, Argentina'
    oldest_article        = 2
-    max_articles_per_feed = 100
+    max_articles_per_feed = 200
    no_stylesheets        = True
    encoding              = 'cp1252'
    use_embedded_content  = False
    language              = 'es'
    remove_empty_feeds    = True
    masthead_url          = 'http://www.pagina12.com.ar/commons/imgs/logo-home.gif'
-    extra_css             = ' body{font-family: Arial,Helvetica,sans-serif } h2{color: #028CCD} img{margin-bottom: 0.4em} .epigrafe{font-size: x-small; background-color: #EBEAE5; color: #565144 } .intro{font-size: 1.1em} '
+    extra_css             = ' body{font-family: Arial,Helvetica,sans-serif } img{margin-bottom: 0.4em} #autor{font-weight: bold} #fecha,#epigrafe{font-size: 0.9em; margin: 5px} #imagen{border: 1px solid black; margin: 0 0 1.25em 1.25em; width: 232px }  '
    conversion_options = {
                          'comment'   : description
@ -45,7 +45,9 @@ class Pagina12(BasicNewsRecipe):
             ,(u'NO'             , u'http://www.pagina12.com.ar/diario/rss/no.xml'          )
             ,(u'Las/12'         , u'http://www.pagina12.com.ar/diario/rss/las12.xml'       )
             ,(u'Soy'            , u'http://www.pagina12.com.ar/diario/rss/soy.xml'         )
-             ,(u'M2'             , u'http://www.pagina12.com.ar/diario/rss/futuro.xml'      )
+             ,(u'Futuro'         , u'http://www.pagina12.com.ar/diario/rss/futuro.xml'      )
             ,(u'M2'             , u'http://www.pagina12.com.ar/diario/rss/m2.xml'          )
             ,(u'Rosario/12'     , u'http://www.pagina12.com.ar/diario/rss/rosario.xml'     )
            ]
    def print_version(self, url):
@ -60,3 +62,7 @@ class Pagina12(BasicNewsRecipe):
              return image['src']
        return None
    def preprocess_html(self, soup):
        for item in soup.findAll(style=True):
            del item['style']
        return soup
--- a/resources/recipes/variety.recipe
+++ b/resources/recipes/variety.recipe
@ -1,46 +1,46 @@
-#!/usr/bin/env  python
+__license__   = 'GPL v3'
-
+__copyright__ = '2009-2010, Darko Miletic <darko.miletic at gmail.com>'
-__license__   = 'GPL v3'
+'''
-__copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>'
+www.variety.com
-'''
+'''
-www.variety.com
+
-'''
+from calibre.web.feeds.recipes import BasicNewsRecipe
-
+
-from calibre.web.feeds.recipes import BasicNewsRecipe
+class Variety(BasicNewsRecipe):
-
+    title                  = 'Variety'
-class Variety(BasicNewsRecipe):
+    __author__             = 'Darko Miletic'
-    title                  = 'Variety'
+    description            = 'Breaking entertainment movie news, movie reviews, entertainment industry events, news and reviews from Cannes, Oscars, and Hollywood awards.  Featuring box office charts, archives and more.'
-    __author__             = 'Darko Miletic'
+    oldest_article         = 2
-    description            = 'Breaking entertainment movie news, movie reviews, entertainment industry events, news and reviews from Cannes, Oscars, and Hollywood awards.  Featuring box office charts, archives and more.'
+    max_articles_per_feed  = 100
-    oldest_article         = 2
+    no_stylesheets         = True
-    max_articles_per_feed  = 100
+    use_embedded_content   = False
-    no_stylesheets         = True
+    encoding               = 'cp1252'
-    use_embedded_content   = False
+    publisher              = 'Red Business Information'
-    encoding               = 'cp1252'
+    category               = 'Entertainment Industry News, Daily Variety, Movie Reviews, TV, Awards, Oscars, Cannes, Box Office, Hollywood'
-    publisher              = 'Red Business Information'
+    language               = 'en'
-    category               = 'Entertainment Industry News, Daily Variety, Movie Reviews, TV, Awards, Oscars, Cannes, Box Office, Hollywood'
+    masthead_url           = 'http://a330.g.akamai.net/7/330/23382/20090528190853/www.variety.com/graphics/variety/Variety_logo_green_tm.gif'
-    language               = 'en'
+    extra_css              = ' body{font-family: Georgia,"Times New Roman",Times,Courier,serif } img{margin-bottom: 1em} '
-
+
-    conversion_options = {  
+    conversion_options = {  
-                             'comments'  : description
+                             'comments'  : description
-                            ,'tags'      : category
+                            ,'tags'      : category
-                            ,'language'  : language
+                            ,'language'  : language
-                            ,'publisher' : publisher
+                            ,'publisher' : publisher
-                         }
+                         }
-
+
-    remove_tags = [dict(name=['object','link','map'])]
+    remove_tags = [dict(name=['object','link','map'])]
-
+
-    keep_only_tags = [dict(name='div', attrs={'id':'article'})]
+    keep_only_tags = [dict(name='div', attrs={'id':'article'})]
-                  
+                  
-    feeds = [(u'News & Articles', u'http://feeds.feedburner.com/variety/headlines' )]
+    feeds = [(u'News & Articles', u'http://feeds.feedburner.com/variety/headlines' )]
-
+
-    def print_version(self, url):
+    def print_version(self, url):
-        rpt = url.rpartition('?')[0]
+        rpt = url.rpartition('?')[0]
-        artid = rpt.rpartition('/')[2]
+        artid = rpt.rpartition('/')[2]
-        catidr = url.rpartition('categoryid=')[2]
+        catidr = url.rpartition('categoryid=')[2]
-        catid = catidr.partition('&')[0]
+        catid = catidr.partition('&')[0]
-        return 'http://www.variety.com/index.asp?layout=print_story&articleid=' + artid + '&categoryid=' + catid
+        return 'http://www.variety.com/index.asp?layout=print_story&articleid=' + artid + '&categoryid=' + catid
-
+
-    def get_article_url(self, article):
+
-        return article.get('feedburner_origlink',  None)
+    def preprocess_html(self, soup):
-
+        return self.adeify_images(soup)