Updated weblogs_sll and The Age

2026-06-06 14:05:21 -04:00 · 2012-07-01 08:13:03 +05:30
parent d70cb9f6e2
commit d20bd1f8b1
2 changed files with 24 additions and 23 deletions
@@ -18,7 +18,7 @@ class TheAge(BasicNewsRecipe):
    publication_type = 'newspaper'
    __author__       = 'Matthew Briggs'
    language         = 'en_AU'
-    
+
    max_articles_per_feed = 1000
    recursions        = 0
    remove_tags       = [dict(name=['table', 'script', 'noscript', 'style']), dict(name='a', attrs={'href':'/'}), dict(name='a', attrs={'href':'/text/'})]
@@ -47,18 +47,19 @@ class TheAge(BasicNewsRecipe):
                if url.startswith('/'):
                    url = 'http://www.theage.com.au' + url
                title = self.tag_to_string(tag)
-                sections[section].append({
-                                 'title': title,
-                                 'url'  : url,
-                                 'date' : strftime('%a, %d %b'),
-                                 'description' : '',
-                                 'content'     : '',
-                                 })
-                                 
+                if url != 'http://www.theage.com.au':
+                    sections[section].append({
+                                    'title': title,
+                                    'url'  : url,
+                                    'date' : strftime('%a, %d %b'),
+                                    'description' : '',
+                                    'content'     : '',
+                                    })
+
        feeds = []

        # Insert feeds in specified order, if available
-        
+
        feedSort = [ 'National', 'World', 'Opinion', 'Columns', 'Business', 'Sport', 'Entertainment' ]
        for i in feedSort:
          if i in sections:
@@ -68,12 +69,12 @@ class TheAge(BasicNewsRecipe):

        for i in feedSort:
          del sections[i]
-        
+
        # Append what is left over...

        for i in sections:
          feeds.append((i,sections[i]))
-            
+
        return feeds

    def get_cover_url(self):
@@ -88,9 +89,9 @@ class TheAge(BasicNewsRecipe):
        return None

    def preprocess_html(self,soup):
-        
+
        for p in soup.findAll('p'):
-        
+
          # Collapse the paragraph by joining the non-tag contents

          contents = [i for i in p.contents if isinstance(i,unicode)]
@@ -103,10 +104,10 @@ class TheAge(BasicNewsRecipe):
              p.extract()
              continue

-            # Shrink the fine print font 
+            # Shrink the fine print font

            if contents=='This material is subject to copyright and any unauthorised use, copying or mirroring is prohibited.':
              p['style'] = 'font-size:small'
-              continue        
-        
+              continue
+
        return soup
@@ -2,8 +2,8 @@
 __license__     = 'GPL v3'
 __copyright__   = '4 February 2011, desUBIKado'
 __author__      = 'desUBIKado'
-__version__     = 'v0.07'
-__date__        = '13, November 2011'
+__version__     = 'v0.08'
+__date__        = '30, June 2012'
 '''
 http://www.weblogssl.com/
 '''
@@ -33,6 +33,7 @@ class weblogssl(BasicNewsRecipe):

    feeds              = [
                          (u'Xataka', u'http://feeds.weblogssl.com/xataka2')
+                          ,(u'Xataka Smart Home', u'http://feeds.weblogssl.com/Xatakahome')
                          ,(u'Xataka Mexico', u'http://feeds.weblogssl.com/xatakamx')
                          ,(u'Xataka M\xf3vil', u'http://feeds.weblogssl.com/xatakamovil')
                          ,(u'Xataka Android', u'http://feeds.weblogssl.com/xatakandroid')
@@ -107,12 +108,14 @@ class weblogssl(BasicNewsRecipe):

    # Para obtener la url original del articulo a partir de la de "feedsportal"
    # El siguiente código es gracias al usuario "bosplans" de www.mobileread.com
-    # http://www.mobileread.com/forums/showthread.php?t=130297
+    # http://www.mobileread.com/forums/sho...d.php?t=130297

    def get_article_url(self, article):
       link = article.get('link', None)
       if link is None:
           return article
+       if link.split('/')[-4]=="xataka2":
+           return article.get('feedburner_origlink', article.get('link', article.get('guid')))
       if link.split('/')[-1]=="story01.htm":
           link=link.split('/')[-2]
           a=['0B','0C','0D','0E','0F','0G','0N'  ,'0L0S','0A']
@@ -121,6 +124,3 @@ class weblogssl(BasicNewsRecipe):
              link=link.replace(a[i],b[i])
           link="http://"+link
       return link
-
-
-