Update El tribuno de Salta

Fixes #1627882 [Updated recipe for El tribuno de Salta](https://bugs.launchpad.net/calibre/+bug/1627882)
2025-09-29 15:31:08 -04:00 · 2016-09-27 06:30:44 +05:30 · 2016-09-27 06:30:44 +05:30 · a496b95840
commit a496b95840
parent 545c8f84cb
1 changed files with 27 additions and 110 deletions
--- a/recipes/eltribuno_salta_impreso.recipe
+++ b/recipes/eltribuno_salta_impreso.recipe
@ -1,128 +1,45 @@
-__license__ = 'GPL v3'
-__copyright__ = '2013, Darko Miletic <darko.miletic at gmail.com>'
+__license__   = 'GPL v3'
+__copyright__ = '2013 - 2016, Darko Miletic <darko.miletic at gmail.com>'
 '''
 http://www.eltribuno.info/salta/edicion_impresa.aspx
 '''

-import urllib
-from calibre.ptempfile import PersistentTemporaryFile
 from calibre.web.feeds.news import BasicNewsRecipe
-from collections import OrderedDict
-

 class ElTribunoSaltaImpreso(BasicNewsRecipe):
-    title = 'El Tribuno Salta (Edición Impresa)'
-    __author__ = 'Darko Miletic'
-    description = "Diario principal de Salta"
-    publisher = 'Horizontes S.A.'
-    category = 'news, politics, Salta, Argentina, World'
-    oldest_article = 2
-    language = 'es_AR'
-    max_articles_per_feed = 250
-    no_stylesheets = True
-    use_embedded_content = False
-    encoding = 'utf8'
-    publication_type = 'newspaper'
-    delay = 1
-    articles_are_obfuscated = True
-    temp_files = []
-    PREFIX = 'http://www.eltribuno.info/salta/'
-    INDEX = PREFIX + 'edicion_impresa.aspx'
-    PRINTURL = PREFIX + 'nota_print.aspx?%s'
+    title                   = 'El Tribuno Salta'
+    __author__              = 'Darko Miletic'
+    description             = "Diario principal de Salta"
+    publisher               = 'Horizontes S.A.'
+    category                = 'news, politics, Salta, Argentina, World'
+    oldest_article          = 2
+    language                = 'es_AR'
+    max_articles_per_feed   = 250
+    no_stylesheets          = True
+    use_embedded_content    = False
+    encoding                = 'utf8'
+    publication_type        = 'newspaper'
+    remove_javascript       = True
+    auto_cleanup            = True

    conversion_options = {
-        'comment': description, 'tags': category, 'publisher': publisher, 'language': language, 'linearize_tables': True
-    }
-
-    keep_only_tags = [
-        dict(name='div', attrs={'class': ['notaHead', 'notaContent']})]
-    remove_tags = [
-        dict(name=['meta', 'iframe', 'base',
-                   'object', 'embed', 'link', 'img']),
-        dict(name='ul', attrs={'class': 'Tabs'})
-    ]
+                        'comment'  : description
+                        , 'tags'     : category
+                        , 'publisher': publisher
+                        , 'language' : language
+                        }

    extra_css = """
                body{font-family: Arial,Helvetica,sans-serif}
-                .notaHead h4{text-transform: uppercase; color: gray}
                img{margin-top: 0.8em; display: block}
                """

-    def parse_index(self):
-        feeds = OrderedDict()
-        soup = None
-        count = 0
-        while (count < 5):
-            try:
-                soup = self.index_to_soup(self.INDEX)
-                count = 5
-            except:
-                print "Retrying download..."
-            count += 1
-        if not soup:
-            return []
-        alink = soup.find('a', href=True, attrs={'class': 'ZoomTapa'})
-        if alink and 'href' in alink:
-            self.cover_url = alink['href']
-        sections = soup.findAll(
-            'div', attrs={'id': lambda x: x and x.startswith('Ediciones')})
-        for section in sections:
-            section_title = 'Sin titulo'
-            sectiont = section.find('h3', attrs={'class': 'NombreSeccion'})
-            if sectiont:
-                section_title = self.tag_to_string(sectiont.span)
-
-            arts = section.findAll(
-                'div', attrs={'class': 'Noticia NoticiaAB1'})
-            for article in arts:
-                articles = []
-                title = self.tag_to_string(article.div.h3.a)
-                url = article.div.h3.a['href']
-                description = self.tag_to_string(article.p)
-                articles.append({'title': title, 'url': url,
-                                 'description': description, 'date': ''})
-
-                if articles:
-                    if section_title not in feeds:
-                        feeds[section_title] = []
-                    feeds[section_title] += articles
-
-        ans = [(key, val) for key, val in feeds.iteritems()]
-        return ans
-
-    def preprocess_html(self, soup):
-        for item in soup.findAll(style=True):
-            del item['style']
-        for item in soup.findAll('a'):
-            if item.string is not None:
-                str = item.string
-                item.replaceWith(str)
-            else:
-                str = self.tag_to_string(item)
-                item.replaceWith(str)
-        return soup
+    feeds = [
+              (u'Mas leidas', u'http://www.eltribuno.info/rss/salta/masleidas.xml')
+              ,(u'El Tribuno', u'http://www.eltribuno.info/rss/salta/home.xml')
+              ,(u'Salta'     , u'http://www.eltribuno.info/rss/salta/salta.xml')
+              ,(u'Deportes'  , u'http://www.eltribuno.info/rss/salta/deportes.xml')
+            ]

    def get_masthead_title(self):
        return 'El Tribuno'
-
-    def get_obfuscated_article(self, url):
-        count = 0
-        while (count < 10):
-            try:
-                response = self.browser.open(url)
-                html = response.read()
-                count = 10
-            except:
-                print "Retrying download..."
-            count += 1
-        tfile = PersistentTemporaryFile('_fa.html')
-        tfile.write(html)
-        tfile.close()
-        self.temp_files.append(tfile)
-        return tfile.name
-
-    def print_version(self, url):
-        right = url.rpartition('/')[2]
-        artid = right.partition('-')[0]
-        params = {'Note': artid}
-        return (self.PRINTURL % urllib.urlencode(params))