Update El tribuno de Salta

Fixes #1627882 [Updated recipe for El tribuno de Salta](https://bugs.launchpad.net/calibre/+bug/1627882)
2025-08-11 09:13:57 -04:00 · 2016-09-27 06:30:44 +05:30 · 2016-09-27 06:30:44 +05:30 · a496b95840
commit a496b95840
parent 545c8f84cb
1 changed files with 27 additions and 110 deletions
--- a/recipes/eltribuno_salta_impreso.recipe
+++ b/recipes/eltribuno_salta_impreso.recipe
@ -1,17 +1,13 @@
 __license__   = 'GPL v3'
-__copyright__ = '2013, Darko Miletic <darko.miletic at gmail.com>'
+__copyright__ = '2013 - 2016, Darko Miletic <darko.miletic at gmail.com>'
 '''
 http://www.eltribuno.info/salta/edicion_impresa.aspx
 '''
 import urllib
 from calibre.ptempfile import PersistentTemporaryFile
 from calibre.web.feeds.news import BasicNewsRecipe
 from collections import OrderedDict
 class ElTribunoSaltaImpreso(BasicNewsRecipe):
-    title = 'El Tribuno Salta (Edición Impresa)'
+    title                   = 'El Tribuno Salta'
    __author__              = 'Darko Miletic'
    description             = "Diario principal de Salta"
    publisher               = 'Horizontes S.A.'
@ -23,106 +19,27 @@ class ElTribunoSaltaImpreso(BasicNewsRecipe):
    use_embedded_content    = False
    encoding                = 'utf8'
    publication_type        = 'newspaper'
-    delay = 1
+    remove_javascript       = True
-    articles_are_obfuscated = True
+    auto_cleanup            = True
    temp_files = []
    PREFIX = 'http://www.eltribuno.info/salta/'
    INDEX = PREFIX + 'edicion_impresa.aspx'
    PRINTURL = PREFIX + 'nota_print.aspx?%s'
    conversion_options = {
-        'comment': description, 'tags': category, 'publisher': publisher, 'language': language, 'linearize_tables': True
+                        'comment'  : description
                        , 'tags'     : category
                        , 'publisher': publisher
                        , 'language' : language
                        }
    keep_only_tags = [
        dict(name='div', attrs={'class': ['notaHead', 'notaContent']})]
    remove_tags = [
        dict(name=['meta', 'iframe', 'base',
                   'object', 'embed', 'link', 'img']),
        dict(name='ul', attrs={'class': 'Tabs'})
    ]
    extra_css = """
                body{font-family: Arial,Helvetica,sans-serif}
                .notaHead h4{text-transform: uppercase; color: gray}
                img{margin-top: 0.8em; display: block}
                """
-    def parse_index(self):
+    feeds = [
-        feeds = OrderedDict()
+              (u'Mas leidas', u'http://www.eltribuno.info/rss/salta/masleidas.xml')
-        soup = None
+              ,(u'El Tribuno', u'http://www.eltribuno.info/rss/salta/home.xml')
-        count = 0
+              ,(u'Salta'     , u'http://www.eltribuno.info/rss/salta/salta.xml')
-        while (count < 5):
+              ,(u'Deportes'  , u'http://www.eltribuno.info/rss/salta/deportes.xml')
-            try:
+            ]
                soup = self.index_to_soup(self.INDEX)
                count = 5
            except:
                print "Retrying download..."
            count += 1
        if not soup:
            return []
        alink = soup.find('a', href=True, attrs={'class': 'ZoomTapa'})
        if alink and 'href' in alink:
            self.cover_url = alink['href']
        sections = soup.findAll(
            'div', attrs={'id': lambda x: x and x.startswith('Ediciones')})
        for section in sections:
            section_title = 'Sin titulo'
            sectiont = section.find('h3', attrs={'class': 'NombreSeccion'})
            if sectiont:
                section_title = self.tag_to_string(sectiont.span)
            arts = section.findAll(
                'div', attrs={'class': 'Noticia NoticiaAB1'})
            for article in arts:
                articles = []
                title = self.tag_to_string(article.div.h3.a)
                url = article.div.h3.a['href']
                description = self.tag_to_string(article.p)
                articles.append({'title': title, 'url': url,
                                 'description': description, 'date': ''})
                if articles:
                    if section_title not in feeds:
                        feeds[section_title] = []
                    feeds[section_title] += articles
        ans = [(key, val) for key, val in feeds.iteritems()]
        return ans
    def preprocess_html(self, soup):
        for item in soup.findAll(style=True):
            del item['style']
        for item in soup.findAll('a'):
            if item.string is not None:
                str = item.string
                item.replaceWith(str)
            else:
                str = self.tag_to_string(item)
                item.replaceWith(str)
        return soup
    def get_masthead_title(self):
        return 'El Tribuno'
    def get_obfuscated_article(self, url):
        count = 0
        while (count < 10):
            try:
                response = self.browser.open(url)
                html = response.read()
                count = 10
            except:
                print "Retrying download..."
            count += 1
        tfile = PersistentTemporaryFile('_fa.html')
        tfile.write(html)
        tfile.close()
        self.temp_files.append(tfile)
        return tfile.name
    def print_version(self, url):
        right = url.rpartition('/')[2]
        artid = right.partition('-')[0]
        params = {'Note': artid}
        return (self.PRINTURL % urllib.urlencode(params))