diff --git a/recipes/the_age.recipe b/recipes/the_age.recipe index eddb5e5000..415ff0a25d 100644 --- a/recipes/the_age.recipe +++ b/recipes/the_age.recipe @@ -18,7 +18,7 @@ class TheAge(BasicNewsRecipe): publication_type = 'newspaper' __author__ = 'Matthew Briggs' language = 'en_AU' - + max_articles_per_feed = 1000 recursions = 0 remove_tags = [dict(name=['table', 'script', 'noscript', 'style']), dict(name='a', attrs={'href':'/'}), dict(name='a', attrs={'href':'/text/'})] @@ -47,18 +47,19 @@ class TheAge(BasicNewsRecipe): if url.startswith('/'): url = 'http://www.theage.com.au' + url title = self.tag_to_string(tag) - sections[section].append({ - 'title': title, - 'url' : url, - 'date' : strftime('%a, %d %b'), - 'description' : '', - 'content' : '', - }) - + if url != 'http://www.theage.com.au': + sections[section].append({ + 'title': title, + 'url' : url, + 'date' : strftime('%a, %d %b'), + 'description' : '', + 'content' : '', + }) + feeds = [] # Insert feeds in specified order, if available - + feedSort = [ 'National', 'World', 'Opinion', 'Columns', 'Business', 'Sport', 'Entertainment' ] for i in feedSort: if i in sections: @@ -68,12 +69,12 @@ class TheAge(BasicNewsRecipe): for i in feedSort: del sections[i] - + # Append what is left over... for i in sections: feeds.append((i,sections[i])) - + return feeds def get_cover_url(self): @@ -88,9 +89,9 @@ class TheAge(BasicNewsRecipe): return None def preprocess_html(self,soup): - + for p in soup.findAll('p'): - + # Collapse the paragraph by joining the non-tag contents contents = [i for i in p.contents if isinstance(i,unicode)] @@ -103,10 +104,10 @@ class TheAge(BasicNewsRecipe): p.extract() continue - # Shrink the fine print font + # Shrink the fine print font if contents=='This material is subject to copyright and any unauthorised use, copying or mirroring is prohibited.': p['style'] = 'font-size:small' - continue - + continue + return soup diff --git a/recipes/weblogs_sl.recipe b/recipes/weblogs_sl.recipe index e068544522..8622cccef8 100644 --- a/recipes/weblogs_sl.recipe +++ b/recipes/weblogs_sl.recipe @@ -2,8 +2,8 @@ __license__ = 'GPL v3' __copyright__ = '4 February 2011, desUBIKado' __author__ = 'desUBIKado' -__version__ = 'v0.07' -__date__ = '13, November 2011' +__version__ = 'v0.08' +__date__ = '30, June 2012' ''' http://www.weblogssl.com/ ''' @@ -33,6 +33,7 @@ class weblogssl(BasicNewsRecipe): feeds = [ (u'Xataka', u'http://feeds.weblogssl.com/xataka2') + ,(u'Xataka Smart Home', u'http://feeds.weblogssl.com/Xatakahome') ,(u'Xataka Mexico', u'http://feeds.weblogssl.com/xatakamx') ,(u'Xataka M\xf3vil', u'http://feeds.weblogssl.com/xatakamovil') ,(u'Xataka Android', u'http://feeds.weblogssl.com/xatakandroid') @@ -107,12 +108,14 @@ class weblogssl(BasicNewsRecipe): # Para obtener la url original del articulo a partir de la de "feedsportal" # El siguiente código es gracias al usuario "bosplans" de www.mobileread.com - # http://www.mobileread.com/forums/showthread.php?t=130297 + # http://www.mobileread.com/forums/sho...d.php?t=130297 def get_article_url(self, article): link = article.get('link', None) if link is None: return article + if link.split('/')[-4]=="xataka2": + return article.get('feedburner_origlink', article.get('link', article.get('guid'))) if link.split('/')[-1]=="story01.htm": link=link.split('/')[-2] a=['0B','0C','0D','0E','0F','0G','0N' ,'0L0S','0A'] @@ -121,6 +124,3 @@ class weblogssl(BasicNewsRecipe): link=link.replace(a[i],b[i]) link="http://"+link return link - - -