Updated weblogs_sll and The Age

This commit is contained in:
Kovid Goyal 2012-07-01 08:13:03 +05:30
parent d70cb9f6e2
commit d20bd1f8b1
2 changed files with 24 additions and 23 deletions

View File

@ -18,7 +18,7 @@ class TheAge(BasicNewsRecipe):
publication_type = 'newspaper'
__author__ = 'Matthew Briggs'
language = 'en_AU'
max_articles_per_feed = 1000
recursions = 0
remove_tags = [dict(name=['table', 'script', 'noscript', 'style']), dict(name='a', attrs={'href':'/'}), dict(name='a', attrs={'href':'/text/'})]
@ -47,18 +47,19 @@ class TheAge(BasicNewsRecipe):
if url.startswith('/'):
url = 'http://www.theage.com.au' + url
title = self.tag_to_string(tag)
sections[section].append({
'title': title,
'url' : url,
'date' : strftime('%a, %d %b'),
'description' : '',
'content' : '',
})
if url != 'http://www.theage.com.au':
sections[section].append({
'title': title,
'url' : url,
'date' : strftime('%a, %d %b'),
'description' : '',
'content' : '',
})
feeds = []
# Insert feeds in specified order, if available
feedSort = [ 'National', 'World', 'Opinion', 'Columns', 'Business', 'Sport', 'Entertainment' ]
for i in feedSort:
if i in sections:
@ -68,12 +69,12 @@ class TheAge(BasicNewsRecipe):
for i in feedSort:
del sections[i]
# Append what is left over...
for i in sections:
feeds.append((i,sections[i]))
return feeds
def get_cover_url(self):
@ -88,9 +89,9 @@ class TheAge(BasicNewsRecipe):
return None
def preprocess_html(self,soup):
for p in soup.findAll('p'):
# Collapse the paragraph by joining the non-tag contents
contents = [i for i in p.contents if isinstance(i,unicode)]
@ -103,10 +104,10 @@ class TheAge(BasicNewsRecipe):
p.extract()
continue
# Shrink the fine print font
# Shrink the fine print font
if contents=='This material is subject to copyright and any unauthorised use, copying or mirroring is prohibited.':
p['style'] = 'font-size:small'
continue
continue
return soup

View File

@ -2,8 +2,8 @@
__license__ = 'GPL v3'
__copyright__ = '4 February 2011, desUBIKado'
__author__ = 'desUBIKado'
__version__ = 'v0.07'
__date__ = '13, November 2011'
__version__ = 'v0.08'
__date__ = '30, June 2012'
'''
http://www.weblogssl.com/
'''
@ -33,6 +33,7 @@ class weblogssl(BasicNewsRecipe):
feeds = [
(u'Xataka', u'http://feeds.weblogssl.com/xataka2')
,(u'Xataka Smart Home', u'http://feeds.weblogssl.com/Xatakahome')
,(u'Xataka Mexico', u'http://feeds.weblogssl.com/xatakamx')
,(u'Xataka M\xf3vil', u'http://feeds.weblogssl.com/xatakamovil')
,(u'Xataka Android', u'http://feeds.weblogssl.com/xatakandroid')
@ -107,12 +108,14 @@ class weblogssl(BasicNewsRecipe):
# Para obtener la url original del articulo a partir de la de "feedsportal"
# El siguiente código es gracias al usuario "bosplans" de www.mobileread.com
# http://www.mobileread.com/forums/showthread.php?t=130297
# http://www.mobileread.com/forums/sho...d.php?t=130297
def get_article_url(self, article):
link = article.get('link', None)
if link is None:
return article
if link.split('/')[-4]=="xataka2":
return article.get('feedburner_origlink', article.get('link', article.get('guid')))
if link.split('/')[-1]=="story01.htm":
link=link.split('/')[-2]
a=['0B','0C','0D','0E','0F','0G','0N' ,'0L0S','0A']
@ -121,6 +124,3 @@ class weblogssl(BasicNewsRecipe):
link=link.replace(a[i],b[i])
link="http://"+link
return link