ARG Noticias by Darko Miletic

Fixes #1249506 [New recipe for Argentinian news portal ArgNoticias](https://bugs.launchpad.net/calibre/+bug/1249506)
2025-06-23 15:30:45 -04:00 · 2013-11-09 10:44:10 +05:30 · 2013-11-09 10:44:10 +05:30 · aa0d4f1b71
commit aa0d4f1b71
parent c1e5cddf5f
2 changed files with 96 additions and 0 deletions
--- a/recipes/argnoticias.recipe
+++ b/recipes/argnoticias.recipe
@ -0,0 +1,96 @@
+
+__license__   = 'GPL v3'
+__copyright__ = '2013, Darko Miletic <darko.miletic at gmail.com>'
+
+'''
+www.argnoticias.com
+'''
+
+import time
+from calibre import strftime
+from calibre.web.feeds.recipes import BasicNewsRecipe
+
+class ArgNoticias(BasicNewsRecipe):
+    title                 = 'ARG Noticias'
+    __author__            = 'Darko Miletic'
+    description           = 'Ultimas noticias de Argentina'
+    publisher             = 'ARG Noticias'
+    category              = 'news, politics, Argentina'
+    oldest_article        = 2
+    max_articles_per_feed = 100
+    no_stylesheets        = True
+    encoding              = 'utf-8'
+    use_embedded_content  = False
+    masthead_url          = 'http://www.argnoticias.com/images/arg-logo-footer.png'
+    language              = 'es_AR'
+    publication_type      = 'newsportal'
+    INDEX                 = 'http://www.argnoticias.com'
+    extra_css             = ''
+
+    conversion_options = {
+                          'comment'  : description
+                        , 'tags'     : category
+                        , 'publisher': publisher
+                        , 'language' : language
+                        }
+
+    keep_only_tags = [dict(name='div', attrs={'class':['itemHeader','itemBody','itemAuthorBlock']})]
+
+    remove_tags = [
+                    dict(name=['object','link','base','iframe']),
+                    dict(name='div', attrs={'class':['b2jsocial_parent','itemSocialSharing']})
+                  ]
+
+    feeds = [
+               (u'Politica'    , u'http://www.argnoticias.com/index.php/politica'    )
+              ,(u'Economia'    , u'http://www.argnoticias.com/index.php/economia'    )
+              ,(u'Sociedad'    , u'http://www.argnoticias.com/index.php/sociedad'    )
+              ,(u'Mundo'       , u'http://www.argnoticias.com/index.php/mundo'       )
+              ,(u'Deportes'    , u'http://www.argnoticias.com/index.php/deportes'    )
+              ,(u'Espectaculos', u'http://www.argnoticias.com/index.php/espectaculos')
+              ,(u'Tendencias'  , u'http://www.argnoticias.com/index.php/tendencias'  )
+            ]
+
+    def parse_index(self):
+        totalfeeds = []
+        lfeeds = self.get_feeds()
+        checker  = []
+        for feedobj in lfeeds:
+            feedtitle, feedurl = feedobj
+            self.report_progress(0, _('Fetching feed')+' %s...'%(feedtitle if feedtitle else feedurl))
+            articles = []
+            soup = self.index_to_soup(feedurl)
+            for item in soup.findAll('div', attrs={'class':'Nota'}):
+                atag        = item.find('a', attrs={'class':'moduleItemTitle'})
+                ptag        = item.find('div', attrs={'class':'moduleItemIntrotext'})
+                url         = self.INDEX + atag['href']
+                title       = self.tag_to_string(atag)
+                description = self.tag_to_string(ptag)
+                date  = strftime("%a, %d %b %Y %H:%M:%S +0000",time.gmtime())
+                if url not in checker:
+                    checker.append(url)
+                    articles.append({
+                                          'title'      :title
+                                         ,'date'       :date
+                                         ,'url'        :url
+                                         ,'description':description
+                                        })
+
+            for item in soup.findAll('li'):
+                atag        = item.find('a', attrs={'class':'moduleItemTitle'})
+                if atag:
+                    ptag        = item.find('div', attrs={'class':'moduleItemIntrotext'})
+                    url         = self.INDEX + atag['href']
+                    title       = self.tag_to_string(atag)
+                    description = self.tag_to_string(ptag)
+                    date  = strftime("%a, %d %b %Y %H:%M:%S +0000",time.gmtime())
+                    if url not in checker:
+                        checker.append(url)
+                        articles.append({
+                                              'title'      :title
+                                             ,'date'       :date
+                                             ,'url'        :url
+                                             ,'description':description
+                                            })
+            totalfeeds.append((feedtitle, articles))
+        return totalfeeds
--- a/recipes/icons/argnoticias.png
+++ b/recipes/icons/argnoticias.png