From 5f3c10f91df30f2a8d5c2f5d6ca98c572251a4c0 Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Tue, 23 Jul 2013 16:06:32 +0530
Subject: [PATCH] El Tribuno Salta and Jujuy by Darko Miletic

Fixes #1203724 [New recipes for El Tribuno Salta and Jujuy](https://bugs.launchpad.net/calibre/+bug/1203724)
---
 recipes/eltribuno_jujuy_impreso.recipe    | 127 ++++++++++++++++++++++
 recipes/eltribuno_salta_impreso.recipe    | 127 ++++++++++++++++++++++
 recipes/icons/eltribuno_jujuy_impreso.png | Bin 0 -> 592 bytes
 recipes/icons/eltribuno_salta_impreso.png | Bin 0 -> 592 bytes
 4 files changed, 254 insertions(+)
 create mode 100644 recipes/eltribuno_jujuy_impreso.recipe
 create mode 100644 recipes/eltribuno_salta_impreso.recipe
 create mode 100644 recipes/icons/eltribuno_jujuy_impreso.png
 create mode 100644 recipes/icons/eltribuno_salta_impreso.png

diff --git a/recipes/eltribuno_jujuy_impreso.recipe b/recipes/eltribuno_jujuy_impreso.recipe
new file mode 100644
index 0000000000..2b725231c9
--- /dev/null
+++ b/recipes/eltribuno_jujuy_impreso.recipe
@@ -0,0 +1,127 @@
+__license__   = 'GPL v3'
+__copyright__ = '2013, Darko Miletic <darko.miletic at gmail.com>'
+'''
+http://www.eltribuno.info/jujuy/edicion_impresa.aspx
+'''
+
+import urllib
+from calibre.ptempfile import PersistentTemporaryFile
+from calibre.web.feeds.news import BasicNewsRecipe
+from collections import OrderedDict
+
+class ElTribunoJujuyImpreso(BasicNewsRecipe):
+    title                   = 'El Tribuno Jujuy (Edición Impresa)'
+    __author__              = 'Darko Miletic'
+    description             = "Diario principal de Jujuy"
+    publisher               = 'Horizontes S.A.'
+    category                = 'news, politics, Jujuy, Argentina, World'
+    oldest_article          = 2
+    language                = 'es_AR'
+    max_articles_per_feed   = 250
+    no_stylesheets          = True
+    use_embedded_content    = False
+    encoding                = 'cp1252'
+    publication_type        = 'newspaper'
+    delay                   = 1
+    articles_are_obfuscated = True
+    temp_files              = []
+    PREFIX                  = 'http://www.eltribuno.info/jujuy/'
+    INDEX                   = PREFIX + 'edicion_impresa.aspx'
+    PRINTURL                = PREFIX + 'nota_print.aspx?%s'
+
+    conversion_options = {
+                          'comment'          : description
+                        , 'tags'             : category
+                        , 'publisher'        : publisher
+                        , 'language'         : language
+                        , 'linearize_tables' : True
+                        }
+
+    keep_only_tags = [dict(name='div' , attrs={'class':['notaHead', 'notaContent']})]
+    remove_tags = [
+                     dict(name=['meta','iframe','base','object','embed','link','img']),
+                     dict(name='ul', attrs={'class':'Tabs'})
+                  ]
+
+    extra_css = """
+                body{font-family: Arial,Helvetica,sans-serif}
+                .notaHead h4{text-transform: uppercase; color: gray}
+                img{margin-top: 0.8em; display: block}
+                """
+
+    def parse_index(self):
+        feeds = OrderedDict()
+        soup = None
+        count = 0
+        while (count < 5):
+            try:
+                soup = self.index_to_soup(self.INDEX)
+                count = 5
+            except:
+                print "Retrying download..."
+            count += 1
+        if not soup:
+            return []
+        alink = soup.find('a', href=True, attrs={'class':'ZoomTapa'})
+        if alink and 'href' in alink:
+            self.cover_url = alink['href']
+        sections = soup.findAll('div', attrs={'id':lambda x: x and x.startswith('Ediciones')})
+        for section in sections:
+            section_title = 'Sin titulo'
+            sectiont=section.find('h3', attrs={'class':'NombreSeccion'})
+            if sectiont:
+                section_title = self.tag_to_string(sectiont.span)
+
+            arts = section.findAll('div', attrs={'class':'Noticia NoticiaAB1'})
+            for article in arts:
+                articles = []
+                title=self.tag_to_string(article.div.h3.a)
+                url=article.div.h3.a['href']
+                description=self.tag_to_string(article.p)
+                articles.append({'title':title, 'url':url, 'description':description, 'date':''})
+
+                if articles:
+                    if section_title not in feeds:
+                        feeds[section_title] = []
+                    feeds[section_title] += articles
+
+        ans = [(key, val) for key, val in feeds.iteritems()]
+        return ans
+
+    def preprocess_html(self, soup):
+        for item in soup.findAll(style=True):
+            del item['style']
+        for item in soup.findAll('a'):
+            if item.string is not None:
+                str = item.string
+                item.replaceWith(str)
+            else:
+                str = self.tag_to_string(item)
+                item.replaceWith(str)
+        return soup
+
+    def get_masthead_title(self):
+        return 'El Tribuno'
+
+    def get_obfuscated_article(self, url):
+        count = 0
+        while (count < 10):
+            try:
+                response = self.browser.open(url)
+                html = response.read()
+                count = 10
+            except:
+                print "Retrying download..."
+            count += 1
+        tfile = PersistentTemporaryFile('_fa.html')
+        tfile.write(html)
+        tfile.close()
+        self.temp_files.append(tfile)
+        return tfile.name
+
+    def print_version(self, url):
+        right = url.rpartition('/')[2]
+        artid = right.partition('-')[0]
+        params = {'Note':artid}
+        return (self.PRINTURL % urllib.urlencode(params))
+
diff --git a/recipes/eltribuno_salta_impreso.recipe b/recipes/eltribuno_salta_impreso.recipe
new file mode 100644
index 0000000000..67cc073a7e
--- /dev/null
+++ b/recipes/eltribuno_salta_impreso.recipe
@@ -0,0 +1,127 @@
+__license__   = 'GPL v3'
+__copyright__ = '2013, Darko Miletic <darko.miletic at gmail.com>'
+'''
+http://www.eltribuno.info/salta/edicion_impresa.aspx
+'''
+
+import urllib
+from calibre.ptempfile import PersistentTemporaryFile
+from calibre.web.feeds.news import BasicNewsRecipe
+from collections import OrderedDict
+
+class ElTribunoSaltaImpreso(BasicNewsRecipe):
+    title                   = 'El Tribuno Salta (Edición Impresa)'
+    __author__              = 'Darko Miletic'
+    description             = "Diario principal de Salta"
+    publisher               = 'Horizontes S.A.'
+    category                = 'news, politics, Salta, Argentina, World'
+    oldest_article          = 2
+    language                = 'es_AR'
+    max_articles_per_feed   = 250
+    no_stylesheets          = True
+    use_embedded_content    = False
+    encoding                = 'cp1252'
+    publication_type        = 'newspaper'
+    delay                   = 1
+    articles_are_obfuscated = True
+    temp_files              = []
+    PREFIX                  = 'http://www.eltribuno.info/salta/'
+    INDEX                   = PREFIX + 'edicion_impresa.aspx'
+    PRINTURL                = PREFIX + 'nota_print.aspx?%s'
+
+    conversion_options = {
+                          'comment'          : description
+                        , 'tags'             : category
+                        , 'publisher'        : publisher
+                        , 'language'         : language
+                        , 'linearize_tables' : True
+                        }
+
+    keep_only_tags = [dict(name='div' , attrs={'class':['notaHead', 'notaContent']})]
+    remove_tags = [
+                     dict(name=['meta','iframe','base','object','embed','link','img']),
+                     dict(name='ul', attrs={'class':'Tabs'})
+                  ]
+
+    extra_css = """
+                body{font-family: Arial,Helvetica,sans-serif}
+                .notaHead h4{text-transform: uppercase; color: gray}
+                img{margin-top: 0.8em; display: block}
+                """
+
+    def parse_index(self):
+        feeds = OrderedDict()
+        soup = None
+        count = 0
+        while (count < 5):
+            try:
+                soup = self.index_to_soup(self.INDEX)
+                count = 5
+            except:
+                print "Retrying download..."
+            count += 1
+        if not soup:
+            return []
+        alink = soup.find('a', href=True, attrs={'class':'ZoomTapa'})
+        if alink and 'href' in alink:
+            self.cover_url = alink['href']
+        sections = soup.findAll('div', attrs={'id':lambda x: x and x.startswith('Ediciones')})
+        for section in sections:
+            section_title = 'Sin titulo'
+            sectiont=section.find('h3', attrs={'class':'NombreSeccion'})
+            if sectiont:
+                section_title = self.tag_to_string(sectiont.span)
+
+            arts = section.findAll('div', attrs={'class':'Noticia NoticiaAB1'})
+            for article in arts:
+                articles = []
+                title=self.tag_to_string(article.div.h3.a)
+                url=article.div.h3.a['href']
+                description=self.tag_to_string(article.p)
+                articles.append({'title':title, 'url':url, 'description':description, 'date':''})
+
+                if articles:
+                    if section_title not in feeds:
+                        feeds[section_title] = []
+                    feeds[section_title] += articles
+
+        ans = [(key, val) for key, val in feeds.iteritems()]
+        return ans
+
+    def preprocess_html(self, soup):
+        for item in soup.findAll(style=True):
+            del item['style']
+        for item in soup.findAll('a'):
+            if item.string is not None:
+                str = item.string
+                item.replaceWith(str)
+            else:
+                str = self.tag_to_string(item)
+                item.replaceWith(str)
+        return soup
+
+    def get_masthead_title(self):
+        return 'El Tribuno'
+
+    def get_obfuscated_article(self, url):
+        count = 0
+        while (count < 10):
+            try:
+                response = self.browser.open(url)
+                html = response.read()
+                count = 10
+            except:
+                print "Retrying download..."
+            count += 1
+        tfile = PersistentTemporaryFile('_fa.html')
+        tfile.write(html)
+        tfile.close()
+        self.temp_files.append(tfile)
+        return tfile.name
+
+    def print_version(self, url):
+        right = url.rpartition('/')[2]
+        artid = right.partition('-')[0]
+        params = {'Note':artid}
+        return (self.PRINTURL % urllib.urlencode(params))
+
diff --git a/recipes/icons/eltribuno_jujuy_impreso.png b/recipes/icons/eltribuno_jujuy_impreso.png
new file mode 100644
index 0000000000000000000000000000000000000000..8862b78d0c77b95c435de73b4bf78265bcc95815
GIT binary patch
literal 592
zcmV-W0<ZmvP)<h;3K|Lk000e1NJLTq000mG000mO0ssI2kdbIM0006PNkl<Zc$`g9
zJx|+E6!m*Awi8<-fyki=ZjmBXhKdTYM3FjG@CPz5GBB|)^GDd27#LZQ*eZ1(0|>el
z0jV;e3a3F)l(;f+?f9$by$7X7%GLGFz2}{CFUhh@5kNu+$pk2+sHAC11Y{2qM5?^N
zbzRv_=yNei63n2|>2zMlBu%&5t=H?wQ5u|_eBa!hSzBvdUSfzdrzvGj&QQsXZt!t>
zRUNh4=E{nDbp-{z!^0nYd!UKqcrX}nc4SZ-1Ob$2y6)ZI-)wLH1~(z`)04Tp{OR^K
z2}AJa^LcK__kCoDSrmPposATQ31`r2uC6vOE>M@27-PDw(_AhWh9Nh^*k{i(78f<k
zV)OGDu(!4{VOnB3opKNOK~*9*!r0vUdJ@NPcXu;Y9k$yUW66KuYn98A9Ezs7=jXa@
z2aZ#0G_sfaKfnUt5==`1s2tzjjjyj~n#~VKN8_`zKMxP#<0BrI^oV@GFbsL1{^@Cy
zrtfxktX7MfCf59AY3W<51xox|C={Y7;#t4n#{iuCJUDo>vGIO?Kk|Kk7O^pc$s|E5
zmMP`XYPDLaRG=^RypPAnp*Rq~dZ9oKgXlW)VzKBr4!02tc%D}-mw)wonevJ>2o!OU
z%EAJ=YqgrHszmM+FJgK$8rimu7>d*4M3!X@heLdpN+on-D@59rDsdVlnx;L!kfxK#
eL>G|V(Ek_WCy$~!G;g^80000<MNUMnLSTaF5)Xs`

literal 0
HcmV?d00001

diff --git a/recipes/icons/eltribuno_salta_impreso.png b/recipes/icons/eltribuno_salta_impreso.png
new file mode 100644
index 0000000000000000000000000000000000000000..8862b78d0c77b95c435de73b4bf78265bcc95815
GIT binary patch
literal 592
zcmV-W0<ZmvP)<h;3K|Lk000e1NJLTq000mG000mO0ssI2kdbIM0006PNkl<Zc$`g9
zJx|+E6!m*Awi8<-fyki=ZjmBXhKdTYM3FjG@CPz5GBB|)^GDd27#LZQ*eZ1(0|>el
z0jV;e3a3F)l(;f+?f9$by$7X7%GLGFz2}{CFUhh@5kNu+$pk2+sHAC11Y{2qM5?^N
zbzRv_=yNei63n2|>2zMlBu%&5t=H?wQ5u|_eBa!hSzBvdUSfzdrzvGj&QQsXZt!t>
zRUNh4=E{nDbp-{z!^0nYd!UKqcrX}nc4SZ-1Ob$2y6)ZI-)wLH1~(z`)04Tp{OR^K
z2}AJa^LcK__kCoDSrmPposATQ31`r2uC6vOE>M@27-PDw(_AhWh9Nh^*k{i(78f<k
zV)OGDu(!4{VOnB3opKNOK~*9*!r0vUdJ@NPcXu;Y9k$yUW66KuYn98A9Ezs7=jXa@
z2aZ#0G_sfaKfnUt5==`1s2tzjjjyj~n#~VKN8_`zKMxP#<0BrI^oV@GFbsL1{^@Cy
zrtfxktX7MfCf59AY3W<51xox|C={Y7;#t4n#{iuCJUDo>vGIO?Kk|Kk7O^pc$s|E5
zmMP`XYPDLaRG=^RypPAnp*Rq~dZ9oKgXlW)VzKBr4!02tc%D}-mw)wonevJ>2o!OU
z%EAJ=YqgrHszmM+FJgK$8rimu7>d*4M3!X@heLdpN+on-D@59rDsdVlnx;L!kfxK#
eL>G|V(Ek_WCy$~!G;g^80000<MNUMnLSTaF5)Xs`

literal 0
HcmV?d00001