Implement #2564 (Various new recipes)

2025-07-09 03:04:10 -04:00 · 2009-06-08 12:53:58 -07:00 · 2009-06-08 12:53:58 -07:00 · 9266cfb0a5
commit 9266cfb0a5
parent 462124c87b
9 changed files with 295 additions and 1 deletions
--- a/src/calibre/gui2/images/news/elperiodico_catalan.png
+++ b/src/calibre/gui2/images/news/elperiodico_catalan.png
--- a/src/calibre/gui2/images/news/elperiodico_spanish.png
+++ b/src/calibre/gui2/images/news/elperiodico_spanish.png
--- a/src/calibre/gui2/images/news/expansion_spanish.png
+++ b/src/calibre/gui2/images/news/expansion_spanish.png
--- a/src/calibre/web/feeds/recipes/init.py
+++ b/src/calibre/web/feeds/recipes/init.py
@ -45,7 +45,9 @@ recipe_modules = ['recipe_' + r for r in (
           'straitstimes', 'index_hu', 'pcworld_hu', 'hrt', 'rts',
           'h1', 'h2', 'h3', 'phd_comics', 'woz_die', 'elektrolese',
           'climate_progress', 'carta', 'slashdot', 'publico',
-           'the_budget_fashionista'
+           'the_budget_fashionista', 'elperiodico_catalan',
+           'elperiodico_spanish', 'expansion_spanish', 'lavanguardia',
+           'marca',
          )]

 import re, imp, inspect, time, os
--- a/src/calibre/web/feeds/recipes/recipe_elperiodico_catalan.py
+++ b/src/calibre/web/feeds/recipes/recipe_elperiodico_catalan.py
@ -0,0 +1,55 @@
+#!/usr/bin/env  python
+# -*- coding: utf-8 -*-
+
+__license__   = 'GPL v3'
+__copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>'
+'''
+elperiodico.cat
+'''
+
+from calibre.web.feeds.news import BasicNewsRecipe
+from calibre.ebooks.BeautifulSoup import Tag
+
+class ElPeriodico_cat(BasicNewsRecipe):
+    title                 = 'El Periodico de Catalunya'
+    __author__            = 'Darko Miletic'
+    description           = 'Noticias desde Catalunya'
+    publisher             = 'elperiodico.cat'
+    category              = 'news, politics, Spain, Catalunya'
+    oldest_article        = 2
+    max_articles_per_feed = 100
+    no_stylesheets        = True
+    use_embedded_content  = False
+    delay                 = 1
+    encoding              = 'cp1252'
+    language              = _('Catalan')
+
+    html2lrf_options = [
+                          '--comment'  , description
+                        , '--category' , category
+                        , '--publisher', publisher
+                        ]
+
+    html2epub_options  = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"'
+
+    feeds              = [(u"Tota l'edició", u'http://www.elperiodico.cat/rss.asp?id=46')]
+
+
+    keep_only_tags = [dict(name='div', attrs={'id':'noticia'})]
+
+    remove_tags        = [
+                              dict(name=['object','link','script'])
+                             ,dict(name='ul',attrs={'class':'herramientasDeNoticia'})
+                             ,dict(name='div', attrs={'id':'inferiores'})
+                         ]
+
+    def print_version(self, url):
+        return url.replace('/default.asp?','/print.asp?')
+
+    def preprocess_html(self, soup):
+        mcharset = Tag(soup,'meta',[("http-equiv","Content-Type"),("content","text/html; charset=utf-8")])
+        soup.head.insert(0,mcharset)
+        for item in soup.findAll(style=True):
+            del item['style']
+        return soup
+
--- a/src/calibre/web/feeds/recipes/recipe_elperiodico_spanish.py
+++ b/src/calibre/web/feeds/recipes/recipe_elperiodico_spanish.py
@ -0,0 +1,55 @@
+#!/usr/bin/env  python
+# -*- coding: utf-8 -*-
+
+__license__   = 'GPL v3'
+__copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>'
+'''
+elperiodico.com
+'''
+
+from calibre.web.feeds.news import BasicNewsRecipe
+from calibre.ebooks.BeautifulSoup import Tag
+
+class ElPeriodico_esp(BasicNewsRecipe):
+    title                 = 'El Periodico de Catalunya'
+    __author__            = 'Darko Miletic'
+    description           = 'Noticias desde Catalunya'
+    publisher             = 'elperiodico.com'
+    category              = 'news, politics, Spain, Catalunya'
+    oldest_article        = 2
+    max_articles_per_feed = 100
+    no_stylesheets        = True
+    use_embedded_content  = False
+    delay                 = 1
+    encoding              = 'cp1252'
+    language              = _('Spanish')
+
+    html2lrf_options = [
+                          '--comment'  , description
+                        , '--category' , category
+                        , '--publisher', publisher
+                        ]
+
+    html2epub_options  = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"'
+
+    feeds              = [(u"Toda la edición", u'http://www.elperiodico.com/rss.asp?id=46')]
+
+
+    keep_only_tags = [dict(name='div', attrs={'id':'noticia'})]
+
+    remove_tags        = [
+                              dict(name=['object','link','script'])
+                             ,dict(name='ul',attrs={'class':'herramientasDeNoticia'})
+                             ,dict(name='div', attrs={'id':'inferiores'})
+                         ]
+
+    def print_version(self, url):
+        return url.replace('/default.asp?','/print.asp?')
+
+    def preprocess_html(self, soup):
+        mcharset = Tag(soup,'meta',[("http-equiv","Content-Type"),("content","text/html; charset=utf-8")])
+        soup.head.insert(0,mcharset)
+        for item in soup.findAll(style=True):
+            del item['style']
+        return soup
+
--- a/src/calibre/web/feeds/recipes/recipe_expansion_spanish.py
+++ b/src/calibre/web/feeds/recipes/recipe_expansion_spanish.py
@ -0,0 +1,58 @@
+#!/usr/bin/env  python
+# -*- coding: utf-8 -*-
+
+__license__   = 'GPL v3'
+__copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>'
+'''
+www.expansion.com
+'''
+
+from calibre.web.feeds.news import BasicNewsRecipe
+from calibre.ebooks.BeautifulSoup import Tag
+
+class Expansion(BasicNewsRecipe):
+    title                 = 'Diario Expansion'
+    __author__            = 'Darko Miletic'
+    description           = 'Lider de informacion de mercados, economica y politica'
+    publisher             = 'expansion.com'
+    category              = 'news, politics, Spain'
+    oldest_article        = 2
+    max_articles_per_feed = 100
+    no_stylesheets        = True
+    use_embedded_content  = False
+    delay                 = 1
+    encoding              = 'iso-8859-15'
+    language              = _('Spanish')
+    direction             = 'ltr'
+
+    html2lrf_options = [
+                          '--comment'  , description
+                        , '--category' , category
+                        , '--publisher', publisher
+                        ]
+
+    html2epub_options  = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"'
+
+    feeds              = [
+                            (u'Ultimas noticias', u'http://rss.expansion.com/rss/descarga.htm?data2=178')
+                           ,(u'Temas del dia'   , u'http://rss.expansion.com/rss/descarga.htm?data2=178')
+                         ]
+
+
+    keep_only_tags = [dict(name='div', attrs={'id':'principal'})]
+
+    remove_tags        = [
+                             dict(name=['object','link','script'])
+                            ,dict(name='div', attrs={'class':['utilidades','tit_relacionadas']})
+                         ]
+
+    remove_tags_after = [dict(name='div', attrs={'class':'tit_relacionadas'})]
+
+    def preprocess_html(self, soup):
+        soup.html['dir' ] = self.direction
+        mcharset = Tag(soup,'meta',[("http-equiv","Content-Type"),("content","text/html; charset=utf-8")])
+        soup.head.insert(0,mcharset)
+        for item in soup.findAll(style=True):
+            del item['style']
+        return soup
+
--- a/src/calibre/web/feeds/recipes/recipe_lavanguardia.py
+++ b/src/calibre/web/feeds/recipes/recipe_lavanguardia.py
@ -0,0 +1,69 @@
+#!/usr/bin/env  python
+# -*- coding: utf-8 -*-
+
+__license__   = 'GPL v3'
+__copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>'
+'''
+www.lavanguardia.es
+'''
+
+from calibre.web.feeds.news import BasicNewsRecipe
+from calibre.ebooks.BeautifulSoup import Tag
+
+class LaVanguardia(BasicNewsRecipe):
+    title                 = 'La Vanguardia Digital'
+    __author__            = 'Darko Miletic'
+    description           = u'Noticias desde España'
+    publisher             = 'La Vanguardia'
+    category              = 'news, politics, Spain'
+    oldest_article        = 2
+    max_articles_per_feed = 100
+    no_stylesheets        = True
+    use_embedded_content  = False
+    delay                 = 1
+    encoding              = 'cp1252'
+    language              = _('Spanish')
+    direction             = 'ltr'
+
+    html2lrf_options = [
+                          '--comment'  , description
+                        , '--category' , category
+                        , '--publisher', publisher
+                        ]
+
+    html2epub_options  = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"'
+
+    feeds              = [
+                            (u'Ciudadanos'           , u'http://feeds.feedburner.com/lavanguardia/ciudadanos'   )
+                           ,(u'Cultura'              , u'http://feeds.feedburner.com/lavanguardia/cultura'      )
+                           ,(u'Deportes'             , u'http://feeds.feedburner.com/lavanguardia/deportes'     )
+                           ,(u'Economia'             , u'http://feeds.feedburner.com/lavanguardia/economia'     )
+                           ,(u'El lector opina'      , u'http://feeds.feedburner.com/lavanguardia/lectoropina'  )
+                           ,(u'Gente y TV'           , u'http://feeds.feedburner.com/lavanguardia/gente'        )
+                           ,(u'Internacional'        , u'http://feeds.feedburner.com/lavanguardia/internacional')
+                           ,(u'Internet y tecnologia', u'http://feeds.feedburner.com/lavanguardia/internet'     )
+                           ,(u'Motor'                , u'http://feeds.feedburner.com/lavanguardia/motor'        )
+                           ,(u'Politica'             , u'http://feeds.feedburner.com/lavanguardia/politica'     )
+                           ,(u'Sucessos'             , u'http://feeds.feedburner.com/lavanguardia/sucesos'      )
+                         ]
+
+
+    keep_only_tags = [
+                       dict(name='div', attrs={'class':'element1_3'})
+                     ]
+
+    remove_tags        = [
+                             dict(name=['object','link','script'])
+                            ,dict(name='div', attrs={'class':['colC','peu']})
+                         ]
+
+    remove_tags_after = [dict(name='div', attrs={'class':'text'})]
+
+    def preprocess_html(self, soup):
+        soup.html['dir' ] = self.direction
+        mcharset = Tag(soup,'meta',[("http-equiv","Content-Type"),("content","text/html; charset=utf-8")])
+        soup.head.insert(0,mcharset)
+        for item in soup.findAll(style=True):
+            del item['style']
+        return soup
+
--- a/src/calibre/web/feeds/recipes/recipe_marca.py
+++ b/src/calibre/web/feeds/recipes/recipe_marca.py
@ -0,0 +1,55 @@
+#!/usr/bin/env  python
+# -*- coding: utf-8 -*-
+
+__license__   = 'GPL v3'
+__copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>'
+'''
+www.marca.com
+'''
+
+from calibre.web.feeds.news import BasicNewsRecipe
+from calibre.ebooks.BeautifulSoup import Tag
+
+class Marca(BasicNewsRecipe):
+    title                 = 'Marca'
+    __author__            = 'Darko Miletic'
+    description           = 'Noticias deportivas'
+    publisher             = 'marca.com'
+    category              = 'news, sports, Spain'
+    oldest_article        = 2
+    max_articles_per_feed = 100
+    no_stylesheets        = True
+    use_embedded_content  = False
+    delay                 = 1
+    encoding              = 'iso-8859-15'
+    language              = _('Spanish')
+    direction             = 'ltr'
+
+    html2lrf_options = [
+                          '--comment'  , description
+                        , '--category' , category
+                        , '--publisher', publisher
+                        ]
+
+    html2epub_options  = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"'
+
+    feeds              = [(u'Portada', u'http://rss.marca.com/rss/descarga.htm?data2=425')]
+
+    keep_only_tags = [dict(name='div', attrs={'class':['cab_articulo','col_izq']})]
+
+    remove_tags        = [
+                             dict(name=['object','link','script'])
+                            ,dict(name='div', attrs={'class':['colC','peu']})
+                            ,dict(name='div', attrs={'class':['utilidades estirar','bloque_int_corr estirar']})
+                         ]
+
+    remove_tags_after = [dict(name='div', attrs={'class':'bloque_int_corr estirar'})]
+
+    def preprocess_html(self, soup):
+        soup.html['dir' ] = self.direction
+        mcharset = Tag(soup,'meta',[("http-equiv","Content-Type"),("content","text/html; charset=utf-8")])
+        soup.head.insert(0,mcharset)
+        for item in soup.findAll(style=True):
+            del item['style']
+        return soup
+