Update Weblogs SL

This commit is contained in:
Kovid Goyal 2012-12-04 09:26:40 +05:30
parent c1a75d585a
commit e26b9f770e

View File

@ -2,8 +2,8 @@
__license__ = 'GPL v3' __license__ = 'GPL v3'
__copyright__ = '4 February 2011, desUBIKado' __copyright__ = '4 February 2011, desUBIKado'
__author__ = 'desUBIKado' __author__ = 'desUBIKado'
__version__ = 'v0.08' __version__ = 'v0.09'
__date__ = '30, June 2012' __date__ = '02, December 2012'
''' '''
http://www.weblogssl.com/ http://www.weblogssl.com/
''' '''
@ -37,6 +37,7 @@ class weblogssl(BasicNewsRecipe):
,(u'Xataka Mexico', u'http://feeds.weblogssl.com/xatakamx') ,(u'Xataka Mexico', u'http://feeds.weblogssl.com/xatakamx')
,(u'Xataka M\xf3vil', u'http://feeds.weblogssl.com/xatakamovil') ,(u'Xataka M\xf3vil', u'http://feeds.weblogssl.com/xatakamovil')
,(u'Xataka Android', u'http://feeds.weblogssl.com/xatakandroid') ,(u'Xataka Android', u'http://feeds.weblogssl.com/xatakandroid')
,(u'Xataka Windows', u'http://feeds.weblogssl.com/xatakawindows')
,(u'Xataka Foto', u'http://feeds.weblogssl.com/xatakafoto') ,(u'Xataka Foto', u'http://feeds.weblogssl.com/xatakafoto')
,(u'Xataka ON', u'http://feeds.weblogssl.com/xatakaon') ,(u'Xataka ON', u'http://feeds.weblogssl.com/xatakaon')
,(u'Xataka Ciencia', u'http://feeds.weblogssl.com/xatakaciencia') ,(u'Xataka Ciencia', u'http://feeds.weblogssl.com/xatakaciencia')
@ -80,19 +81,31 @@ class weblogssl(BasicNewsRecipe):
keep_only_tags = [dict(name='div', attrs={'id':'infoblock'}), keep_only_tags = [dict(name='div', attrs={'id':'infoblock'}),
dict(name='div', attrs={'class':'post'}), dict(name='div', attrs={'class':'post'}),
dict(name='div', attrs={'id':'blog-comments'}) dict(name='div', attrs={'id':'blog-comments'}),
dict(name='div', attrs={'class':'container'}) #m.xataka.com
] ]
remove_tags = [dict(name='div', attrs={'id':'comment-nav'})] remove_tags = [dict(name='div', attrs={'id':'comment-nav'}),
dict(name='menu', attrs={'class':'social-sharing'}), #m.xataka.com
dict(name='section' , attrs={'class':'comments'}), #m.xataka.com
dict(name='div' , attrs={'class':'article-comments'}), #m.xataka.com
dict(name='nav' , attrs={'class':'article-taxonomy'}) #m.xataka.com
]
remove_tags_after = dict(name='section' , attrs={'class':'comments'})
def print_version(self, url): def print_version(self, url):
return url.replace('http://www.', 'http://m.') return url.replace('http://www.', 'http://m.')
preprocess_regexps = [ preprocess_regexps = [
# Para poner una linea en blanco entre un comentario y el siguiente # Para poner una linea en blanco entre un comentario y el siguiente
(re.compile(r'<li id="c', re.DOTALL|re.IGNORECASE), lambda match: '<br><br><li id="c') (re.compile(r'<li id="c', re.DOTALL|re.IGNORECASE), lambda match: '<br><br><li id="c'),
# Para ver las imágenes en las noticias de m.xataka.com
(re.compile(r'<noscript>', re.DOTALL|re.IGNORECASE), lambda m: ''),
(re.compile(r'</noscript>', re.DOTALL|re.IGNORECASE), lambda m: '')
] ]
# Para sustituir el video incrustado de YouTube por una imagen # Para sustituir el video incrustado de YouTube por una imagen
def preprocess_html(self, soup): def preprocess_html(self, soup):
@ -108,14 +121,16 @@ class weblogssl(BasicNewsRecipe):
# Para obtener la url original del articulo a partir de la de "feedsportal" # Para obtener la url original del articulo a partir de la de "feedsportal"
# El siguiente código es gracias al usuario "bosplans" de www.mobileread.com # El siguiente código es gracias al usuario "bosplans" de www.mobileread.com
# http://www.mobileread.com/forums/sho...d.php?t=130297 # http://www.mobileread.com/forums/showthread.php?t=130297
def get_article_url(self, article): def get_article_url(self, article):
link = article.get('link', None) link = article.get('link', None)
if link is None: if link is None:
return article return article
# if link.split('/')[-4]=="xataka2":
# return article.get('feedburner_origlink', article.get('link', article.get('guid')))
if link.split('/')[-4]=="xataka2": if link.split('/')[-4]=="xataka2":
return article.get('feedburner_origlink', article.get('link', article.get('guid'))) return article.get('guid', None)
if link.split('/')[-1]=="story01.htm": if link.split('/')[-1]=="story01.htm":
link=link.split('/')[-2] link=link.split('/')[-2]
a=['0B','0C','0D','0E','0F','0G','0N' ,'0L0S','0A'] a=['0B','0C','0D','0E','0F','0G','0N' ,'0L0S','0A']