mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Perform PEP8 compliance checks on the entire codebase
Some bits of PEP 8 are turned off via setup.cfg
This commit is contained in:
parent
643977ffa6
commit
567040ee1e
@ -1,16 +1,16 @@
|
|||||||
#!/usr/bin/env python2
|
#!/usr/bin/env python2
|
||||||
##
|
##
|
||||||
## Title: Diario 10minutos.com.uy News and Sports Calibre Recipe
|
# Title: Diario 10minutos.com.uy News and Sports Calibre Recipe
|
||||||
## Contact: Carlos Alves - <carlos@carlosalves.info>
|
# Contact: Carlos Alves - <carlos@carlosalves.info>
|
||||||
##
|
##
|
||||||
## License: GNU General Public License v3 - http://www.gnu.org/copyleft/gpl.html
|
# License: GNU General Public License v3 - http://www.gnu.org/copyleft/gpl.html
|
||||||
## Copyright: Carlos Alves - <carlos@carlosalves.info>
|
# Copyright: Carlos Alves - <carlos@carlosalves.info>
|
||||||
##
|
##
|
||||||
## Written: September 2013
|
# Written: September 2013
|
||||||
## Last Edited: 2016-01-11
|
# Last Edited: 2016-01-11
|
||||||
##
|
##
|
||||||
|
|
||||||
__license__ = 'GPL v3'
|
__license__ = 'GPL v3'
|
||||||
__author__ = '2016, Carlos Alves <carlos@carlosalves.info>'
|
__author__ = '2016, Carlos Alves <carlos@carlosalves.info>'
|
||||||
'''
|
'''
|
||||||
10minutos.com.uy
|
10minutos.com.uy
|
||||||
@ -18,29 +18,30 @@ __author__ = '2016, Carlos Alves <carlos@carlosalves.info>'
|
|||||||
|
|
||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
|
||||||
|
|
||||||
class General(BasicNewsRecipe):
|
class General(BasicNewsRecipe):
|
||||||
title = '10minutos'
|
title = '10minutos'
|
||||||
__author__ = 'Carlos Alves'
|
__author__ = 'Carlos Alves'
|
||||||
description = 'Noticias de Salto - Uruguay'
|
description = 'Noticias de Salto - Uruguay'
|
||||||
tags = 'news, sports'
|
tags = 'news, sports'
|
||||||
language = 'es_UY'
|
language = 'es_UY'
|
||||||
timefmt = '[%a, %d %b, %Y]'
|
timefmt = '[%a, %d %b, %Y]'
|
||||||
use_embedded_content = False
|
use_embedded_content = False
|
||||||
recursion = 5
|
recursion = 5
|
||||||
encoding = 'utf8'
|
encoding = 'utf8'
|
||||||
remove_javascript = True
|
remove_javascript = True
|
||||||
no_stylesheets = True
|
no_stylesheets = True
|
||||||
|
|
||||||
oldest_article = 2
|
oldest_article = 2
|
||||||
max_articles_per_feed = 100
|
max_articles_per_feed = 100
|
||||||
keep_only_tags = [dict(name='div', attrs={'class':'post-content'})]
|
keep_only_tags = [dict(name='div', attrs={'class': 'post-content'})]
|
||||||
|
|
||||||
remove_tags = [
|
remove_tags = [
|
||||||
dict(name='div', attrs={'class':['hr', 'titlebar', 'navigation']}),
|
dict(name='div', attrs={'class': ['hr', 'titlebar', 'navigation']}),
|
||||||
dict(name='div', attrs={'class':'sharedaddy sd-sharing-enabled'}),
|
dict(name='div', attrs={'class': 'sharedaddy sd-sharing-enabled'}),
|
||||||
dict(name='p', attrs={'class':'post-meta'}),
|
dict(name='p', attrs={'class': 'post-meta'}),
|
||||||
dict(name=['object','link'])
|
dict(name=['object', 'link'])
|
||||||
]
|
]
|
||||||
|
|
||||||
extra_css = '''
|
extra_css = '''
|
||||||
h1{font-family:Geneva, Arial, Helvetica, sans-serif;color:#154B7A;}
|
h1{font-family:Geneva, Arial, Helvetica, sans-serif;color:#154B7A;}
|
||||||
@ -49,8 +50,8 @@ class General(BasicNewsRecipe):
|
|||||||
p {font-family:Arial,Helvetica,sans-serif;}
|
p {font-family:Arial,Helvetica,sans-serif;}
|
||||||
'''
|
'''
|
||||||
feeds = [
|
feeds = [
|
||||||
(u'Articulos', u'http://10minutos.com.uy/?feed=rss2')
|
(u'Articulos', u'http://10minutos.com.uy/?feed=rss2')
|
||||||
]
|
]
|
||||||
|
|
||||||
def get_cover_url(self):
|
def get_cover_url(self):
|
||||||
return 'http://10minutos.com.uy/a/img/logo.png'
|
return 'http://10minutos.com.uy/a/img/logo.png'
|
||||||
|
@ -1,9 +1,9 @@
|
|||||||
#!/usr/bin/env python2
|
#!/usr/bin/env python2
|
||||||
##
|
##
|
||||||
## Last Edited: 2016-01-11 Carlos Alves <carlos@carlosalves.info>
|
# Last Edited: 2016-01-11 Carlos Alves <carlos@carlosalves.info>
|
||||||
##
|
##
|
||||||
|
|
||||||
__license__ = 'GPL v3'
|
__license__ = 'GPL v3'
|
||||||
__author__ = '2010, Gustavo Azambuja <hola at gazambuja.com>'
|
__author__ = '2010, Gustavo Azambuja <hola at gazambuja.com>'
|
||||||
'''
|
'''
|
||||||
180.com.uy
|
180.com.uy
|
||||||
@ -11,31 +11,32 @@ __author__ = '2010, Gustavo Azambuja <hola at gazambuja.com>'
|
|||||||
|
|
||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
|
||||||
|
|
||||||
class Noticias(BasicNewsRecipe):
|
class Noticias(BasicNewsRecipe):
|
||||||
title = '180.com.uy'
|
title = '180.com.uy'
|
||||||
__author__ = 'Gustavo Azambuja'
|
__author__ = 'Gustavo Azambuja'
|
||||||
description = 'Noticias de Uruguay'
|
description = 'Noticias de Uruguay'
|
||||||
language = 'es_UY'
|
language = 'es_UY'
|
||||||
timefmt = '[%a, %d %b, %Y]'
|
timefmt = '[%a, %d %b, %Y]'
|
||||||
use_embedded_content = False
|
use_embedded_content = False
|
||||||
recursion = 5
|
recursion = 5
|
||||||
encoding = 'utf-8'
|
encoding = 'utf-8'
|
||||||
remove_javascript = True
|
remove_javascript = True
|
||||||
no_stylesheets = True
|
no_stylesheets = True
|
||||||
|
|
||||||
oldest_article = 2
|
oldest_article = 2
|
||||||
max_articles_per_feed = 100
|
max_articles_per_feed = 100
|
||||||
remove_tags_after = dict(name='article')
|
remove_tags_after = dict(name='article')
|
||||||
keep_only_tags = [
|
keep_only_tags = [
|
||||||
dict(name='h3', attrs={'class':'title'}),
|
dict(name='h3', attrs={'class': 'title'}),
|
||||||
dict(name='div', attrs={'class':'copete'}),
|
dict(name='div', attrs={'class': 'copete'}),
|
||||||
dict(name='article', attrs={'class':'texto'})
|
dict(name='article', attrs={'class': 'texto'})
|
||||||
]
|
]
|
||||||
remove_tags = [
|
remove_tags = [
|
||||||
dict(name=['object','link'])
|
dict(name=['object', 'link'])
|
||||||
]
|
]
|
||||||
|
|
||||||
remove_attributes = ['width','height', 'style', 'font', 'color']
|
remove_attributes = ['width', 'height', 'style', 'font', 'color']
|
||||||
|
|
||||||
extra_css = '''
|
extra_css = '''
|
||||||
h1{font-family:Geneva, Arial, Helvetica, sans-serif;color:#154B7A;}
|
h1{font-family:Geneva, Arial, Helvetica, sans-serif;color:#154B7A;}
|
||||||
@ -44,15 +45,13 @@ class Noticias(BasicNewsRecipe):
|
|||||||
p {font-family:Arial,Helvetica,sans-serif;}
|
p {font-family:Arial,Helvetica,sans-serif;}
|
||||||
'''
|
'''
|
||||||
feeds = [
|
feeds = [
|
||||||
(u'Titulares', u'http://www.180.com.uy/feed.php')
|
(u'Titulares', u'http://www.180.com.uy/feed.php')
|
||||||
]
|
]
|
||||||
|
|
||||||
def get_cover_url(self):
|
def get_cover_url(self):
|
||||||
pass
|
pass
|
||||||
|
|
||||||
|
|
||||||
def preprocess_html(self, soup):
|
def preprocess_html(self, soup):
|
||||||
for item in soup.findAll(style=True):
|
for item in soup.findAll(style=True):
|
||||||
del item['style']
|
del item['style']
|
||||||
return soup
|
return soup
|
||||||
|
|
||||||
|
@ -22,7 +22,7 @@ class E1843(BasicNewsRecipe):
|
|||||||
encoding = 'utf-8'
|
encoding = 'utf-8'
|
||||||
|
|
||||||
keep_only_tags = [
|
keep_only_tags = [
|
||||||
dict(name='h1', attrs={'class':'title'}),
|
dict(name='h1', attrs={'class': 'title'}),
|
||||||
classes('field-name-field-rubric-summary article-header__overlay-main-image meta-info__author article__body'),
|
classes('field-name-field-rubric-summary article-header__overlay-main-image meta-info__author article__body'),
|
||||||
]
|
]
|
||||||
|
|
||||||
@ -54,7 +54,8 @@ class E1843(BasicNewsRecipe):
|
|||||||
r = div.find(**classes('article-rubric'))
|
r = div.find(**classes('article-rubric'))
|
||||||
if r is not None:
|
if r is not None:
|
||||||
desc = self.tag_to_string(r)
|
desc = self.tag_to_string(r)
|
||||||
articles.append({'title':title, 'url':url, 'description':desc})
|
articles.append(
|
||||||
|
{'title': title, 'url': url, 'description': desc})
|
||||||
|
|
||||||
if current_section and articles:
|
if current_section and articles:
|
||||||
ans.append((current_section, articles))
|
ans.append((current_section, articles))
|
||||||
|
@ -1,8 +1,8 @@
|
|||||||
__license__ = 'GPL v3'
|
__license__ = 'GPL v3'
|
||||||
__author__ = 'Luis Hernandez'
|
__author__ = 'Luis Hernandez'
|
||||||
__copyright__ = 'Luis Hernandez<tolyluis@gmail.com>'
|
__copyright__ = 'Luis Hernandez<tolyluis@gmail.com>'
|
||||||
__version__ = 'v0.85'
|
__version__ = 'v0.85'
|
||||||
__date__ = '31 January 2011'
|
__date__ = '31 January 2011'
|
||||||
|
|
||||||
'''
|
'''
|
||||||
www.20minutos.es
|
www.20minutos.es
|
||||||
@ -10,46 +10,39 @@ www.20minutos.es
|
|||||||
import re
|
import re
|
||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
|
||||||
|
|
||||||
class AdvancedUserRecipe1294946868(BasicNewsRecipe):
|
class AdvancedUserRecipe1294946868(BasicNewsRecipe):
|
||||||
|
|
||||||
title = u'20 Minutos new'
|
title = u'20 Minutos new'
|
||||||
publisher = u'Grupo 20 Minutos'
|
publisher = u'Grupo 20 Minutos'
|
||||||
|
|
||||||
__author__ = 'Luis Hernandez'
|
__author__ = 'Luis Hernandez'
|
||||||
description = 'Free spanish newspaper'
|
description = 'Free spanish newspaper'
|
||||||
cover_url = 'http://estaticos.20minutos.es/mmedia/especiales/corporativo/css/img/logotipos_grupo20minutos.gif'
|
cover_url = 'http://estaticos.20minutos.es/mmedia/especiales/corporativo/css/img/logotipos_grupo20minutos.gif'
|
||||||
|
|
||||||
oldest_article = 2
|
oldest_article = 2
|
||||||
max_articles_per_feed = 100
|
max_articles_per_feed = 100
|
||||||
|
|
||||||
remove_javascript = True
|
remove_javascript = True
|
||||||
no_stylesheets = True
|
no_stylesheets = True
|
||||||
use_embedded_content = False
|
use_embedded_content = False
|
||||||
|
|
||||||
encoding = 'ISO-8859-1'
|
encoding = 'ISO-8859-1'
|
||||||
language = 'es'
|
language = 'es'
|
||||||
timefmt = '[%a, %d %b, %Y]'
|
timefmt = '[%a, %d %b, %Y]'
|
||||||
remove_empty_feeds = True
|
remove_empty_feeds = True
|
||||||
|
|
||||||
keep_only_tags = [
|
keep_only_tags = [
|
||||||
dict(name='div', attrs={'id':['content','vinetas',]})
|
dict(name='div', attrs={'id': ['content', 'vinetas', ]}), dict(name='div', attrs={'class': ['boxed', 'description', 'lead', 'article-content', 'cuerpo estirar']}), dict(name='span', attrs={'class': ['photo-bar']}), dict(name='ul', attrs={'class': ['article-author']}) # noqa
|
||||||
,dict(name='div', attrs={'class':['boxed','description','lead','article-content','cuerpo estirar']})
|
]
|
||||||
,dict(name='span', attrs={'class':['photo-bar']})
|
|
||||||
,dict(name='ul', attrs={'class':['article-author']})
|
|
||||||
]
|
|
||||||
|
|
||||||
remove_tags_before = dict(name='ul' , attrs={'class':['servicios-sub']})
|
remove_tags_before = dict(name='ul', attrs={'class': ['servicios-sub']})
|
||||||
remove_tags_after = dict(name='div' , attrs={'class':['related-news','col']})
|
remove_tags_after = dict(
|
||||||
|
name='div', attrs={'class': ['related-news', 'col']})
|
||||||
|
|
||||||
remove_tags = [
|
remove_tags = [
|
||||||
dict(name='ol', attrs={'class':['navigation',]})
|
dict(name='ol', attrs={'class': ['navigation', ]}), dict(name='span', attrs={'class': ['action']}), dict(name='div', attrs={'class': ['twitter comments-list hidden', 'related-news', 'col', 'photo-gallery', 'photo-gallery side-art-block', 'calendario', 'article-comment', 'postto estirar', 'otras_vinetas estirar', 'kment', 'user-actions']}), dict( name='div', attrs={'id': ['twitter-destacados', 'eco-tabs', 'inner', 'vineta_calendario', 'vinetistas clearfix', 'otras_vinetas estirar', 'MIN1', 'main', 'SUP1', 'INT']}), dict(name='ul', attrs={'class': ['article-user-actions', 'stripped-list']}), dict(name='ul', attrs={'id': ['site-links']}), dict(name='li', attrs={'class': ['puntuacion', 'enviar', 'compartir']}) # noqa
|
||||||
,dict(name='span', attrs={'class':['action']})
|
]
|
||||||
,dict(name='div', attrs={'class':['twitter comments-list hidden','related-news','col','photo-gallery','photo-gallery side-art-block','calendario','article-comment','postto estirar','otras_vinetas estirar','kment','user-actions']})
|
|
||||||
,dict(name='div', attrs={'id':['twitter-destacados','eco-tabs','inner','vineta_calendario','vinetistas clearfix','otras_vinetas estirar','MIN1','main','SUP1','INT']})
|
|
||||||
,dict(name='ul', attrs={'class':['article-user-actions','stripped-list']})
|
|
||||||
,dict(name='ul', attrs={'id':['site-links']})
|
|
||||||
,dict(name='li', attrs={'class':['puntuacion','enviar','compartir']})
|
|
||||||
]
|
|
||||||
|
|
||||||
extra_css = """
|
extra_css = """
|
||||||
p{text-align: justify; font-size: 100%}
|
p{text-align: justify; font-size: 100%}
|
||||||
@ -57,23 +50,25 @@ class AdvancedUserRecipe1294946868(BasicNewsRecipe):
|
|||||||
h3{font-family: sans-serif; font-size:150%; font-weight:bold; text-align: justify; }
|
h3{font-family: sans-serif; font-size:150%; font-weight:bold; text-align: justify; }
|
||||||
"""
|
"""
|
||||||
|
|
||||||
preprocess_regexps = [(re.compile(r'<a href="http://estaticos.*?[0-999]px;" target="_blank">', re.DOTALL), lambda m: '')]
|
preprocess_regexps = [(re.compile(
|
||||||
|
r'<a href="http://estaticos.*?[0-999]px;" target="_blank">', re.DOTALL), lambda m: '')]
|
||||||
|
|
||||||
feeds = [
|
feeds = [
|
||||||
(u'Portada' , u'http://www.20minutos.es/rss/')
|
|
||||||
,(u'Nacional' , u'http://www.20minutos.es/rss/nacional/')
|
(u'Portada', u'http://www.20minutos.es/rss/'),
|
||||||
,(u'Internacional' , u'http://www.20minutos.es/rss/internacional/')
|
(u'Nacional', u'http://www.20minutos.es/rss/nacional/'),
|
||||||
,(u'Economia' , u'http://www.20minutos.es/rss/economia/')
|
(u'Internacional', u'http://www.20minutos.es/rss/internacional/'),
|
||||||
,(u'Deportes' , u'http://www.20minutos.es/rss/deportes/')
|
(u'Economia', u'http://www.20minutos.es/rss/economia/'),
|
||||||
,(u'Tecnologia' , u'http://www.20minutos.es/rss/tecnologia/')
|
(u'Deportes', u'http://www.20minutos.es/rss/deportes/'),
|
||||||
,(u'Gente - TV' , u'http://www.20minutos.es/rss/gente-television/')
|
(u'Tecnologia', u'http://www.20minutos.es/rss/tecnologia/'),
|
||||||
,(u'Motor' , u'http://www.20minutos.es/rss/motor/')
|
(u'Gente - TV', u'http://www.20minutos.es/rss/gente-television/'),
|
||||||
,(u'Salud' , u'http://www.20minutos.es/rss/belleza-y-salud/')
|
(u'Motor', u'http://www.20minutos.es/rss/motor/'),
|
||||||
,(u'Viajes' , u'http://www.20minutos.es/rss/viajes/')
|
(u'Salud', u'http://www.20minutos.es/rss/belleza-y-salud/'),
|
||||||
,(u'Vivienda' , u'http://www.20minutos.es/rss/vivienda/')
|
(u'Viajes', u'http://www.20minutos.es/rss/viajes/'),
|
||||||
,(u'Empleo' , u'http://www.20minutos.es/rss/empleo/')
|
(u'Vivienda', u'http://www.20minutos.es/rss/vivienda/'),
|
||||||
,(u'Cine' , u'http://www.20minutos.es/rss/cine/')
|
(u'Empleo', u'http://www.20minutos.es/rss/empleo/'),
|
||||||
,(u'Musica' , u'http://www.20minutos.es/rss/musica/')
|
(u'Cine', u'http://www.20minutos.es/rss/cine/'),
|
||||||
,(u'Vinetas' , u'http://www.20minutos.es/rss/vinetas/')
|
(u'Musica', u'http://www.20minutos.es/rss/musica/'),
|
||||||
,(u'Comunidad20' , u'http://www.20minutos.es/rss/zona20/')
|
(u'Vinetas', u'http://www.20minutos.es/rss/vinetas/'),
|
||||||
]
|
(u'Comunidad20', u'http://www.20minutos.es/rss/zona20/')
|
||||||
|
]
|
||||||
|
@ -1,33 +1,34 @@
|
|||||||
# -*- coding: utf-8 -*-
|
# -*- coding: utf-8 -*-
|
||||||
__license__ = 'GPL v3'
|
__license__ = 'GPL v3'
|
||||||
__copyright__ = '2011 Aurélien Chabot <contact@aurelienchabot.fr>'
|
__copyright__ = '2011 Aurélien Chabot <contact@aurelienchabot.fr>'
|
||||||
'''
|
'''
|
||||||
20minutes.fr
|
20minutes.fr
|
||||||
'''
|
'''
|
||||||
from calibre.web.feeds.recipes import BasicNewsRecipe
|
from calibre.web.feeds.recipes import BasicNewsRecipe
|
||||||
|
|
||||||
|
|
||||||
class Minutes(BasicNewsRecipe):
|
class Minutes(BasicNewsRecipe):
|
||||||
|
|
||||||
title = '20 minutes'
|
title = '20 minutes'
|
||||||
__author__ = u'Aurélien Chabot'
|
__author__ = u'Aurélien Chabot'
|
||||||
description = 'Actualités'
|
description = 'Actualités'
|
||||||
encoding = 'utf-8'
|
encoding = 'utf-8'
|
||||||
publisher = '20minutes.fr'
|
publisher = '20minutes.fr'
|
||||||
category = 'Actualités, France, Monde'
|
category = 'Actualités, France, Monde'
|
||||||
language = 'fr'
|
language = 'fr'
|
||||||
|
|
||||||
use_embedded_content = False
|
use_embedded_content = False
|
||||||
timefmt = ' [%d %b %Y]'
|
timefmt = ' [%d %b %Y]'
|
||||||
max_articles_per_feed = 15
|
max_articles_per_feed = 15
|
||||||
no_stylesheets = True
|
no_stylesheets = True
|
||||||
remove_empty_feeds = True
|
remove_empty_feeds = True
|
||||||
|
|
||||||
keep_only_tags = [
|
keep_only_tags = [
|
||||||
dict(name='h1'),
|
dict(name='h1'),
|
||||||
dict(attrs={'class':lambda x: x and 'lt-content' in x.split()}),
|
dict(attrs={'class': lambda x: x and 'lt-content' in x.split()}),
|
||||||
]
|
]
|
||||||
remove_tags = [
|
remove_tags = [
|
||||||
dict(attrs={'class':lambda x:x and 'content-related' in x.split()}),
|
dict(attrs={'class': lambda x: x and 'content-related' in x.split()}),
|
||||||
]
|
]
|
||||||
remove_tags_after = dict(id='ob_holder')
|
remove_tags_after = dict(id='ob_holder')
|
||||||
|
|
||||||
|
@ -1,4 +1,4 @@
|
|||||||
__license__ = 'GPL v3'
|
__license__ = 'GPL v3'
|
||||||
__copyright__ = '2011, Darko Miletic <darko.miletic at gmail.com>'
|
__copyright__ = '2011, Darko Miletic <darko.miletic at gmail.com>'
|
||||||
'''
|
'''
|
||||||
www.20minutos.es
|
www.20minutos.es
|
||||||
@ -6,45 +6,44 @@ www.20minutos.es
|
|||||||
|
|
||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
|
||||||
|
|
||||||
class t20Minutos(BasicNewsRecipe):
|
class t20Minutos(BasicNewsRecipe):
|
||||||
title = '20 Minutos'
|
title = '20 Minutos'
|
||||||
__author__ = 'Darko Miletic'
|
__author__ = 'Darko Miletic'
|
||||||
description = 'Diario de informacion general y local mas leido de Espania, noticias de ultima hora de Espania, el mundo, local, deportes, noticias curiosas y mas'
|
description = 'Diario de informacion general y local mas leido de Espania, noticias de ultima hora de Espania, el mundo, local, deportes, noticias curiosas y mas' # noqa
|
||||||
publisher = '20 Minutos Online SL'
|
publisher = '20 Minutos Online SL'
|
||||||
category = 'news, politics, Spain'
|
category = 'news, politics, Spain'
|
||||||
oldest_article = 2
|
oldest_article = 2
|
||||||
max_articles_per_feed = 200
|
max_articles_per_feed = 200
|
||||||
no_stylesheets = True
|
no_stylesheets = True
|
||||||
encoding = 'utf8'
|
encoding = 'utf8'
|
||||||
use_embedded_content = True
|
use_embedded_content = True
|
||||||
language = 'es'
|
language = 'es'
|
||||||
remove_empty_feeds = True
|
remove_empty_feeds = True
|
||||||
publication_type = 'newspaper'
|
publication_type = 'newspaper'
|
||||||
masthead_url = 'http://estaticos.20minutos.es/css4/img/ui/logo-301x54.png'
|
masthead_url = 'http://estaticos.20minutos.es/css4/img/ui/logo-301x54.png'
|
||||||
extra_css = """
|
extra_css = """
|
||||||
body{font-family: Arial,Helvetica,sans-serif }
|
body{font-family: Arial,Helvetica,sans-serif }
|
||||||
img{margin-bottom: 0.4em; display:block}
|
img{margin-bottom: 0.4em; display:block}
|
||||||
"""
|
"""
|
||||||
|
|
||||||
conversion_options = {
|
conversion_options = {
|
||||||
'comment' : description
|
'comment': description, 'tags': category, 'publisher': publisher, 'language': language
|
||||||
, 'tags' : category
|
}
|
||||||
, 'publisher' : publisher
|
|
||||||
, 'language' : language
|
|
||||||
}
|
|
||||||
|
|
||||||
remove_tags = [dict(attrs={'class':'mf-viral'})]
|
remove_tags = [dict(attrs={'class': 'mf-viral'})]
|
||||||
remove_attributes=['border']
|
remove_attributes = ['border']
|
||||||
|
|
||||||
feeds = [
|
feeds = [
|
||||||
(u'Principal' , u'http://20minutos.feedsportal.com/c/32489/f/478284/index.rss')
|
|
||||||
,(u'Cine' , u'http://20minutos.feedsportal.com/c/32489/f/478285/index.rss')
|
(u'Principal', u'http://20minutos.feedsportal.com/c/32489/f/478284/index.rss'),
|
||||||
,(u'Internacional' , u'http://20minutos.feedsportal.com/c/32489/f/492689/index.rss')
|
(u'Cine', u'http://20minutos.feedsportal.com/c/32489/f/478285/index.rss'),
|
||||||
,(u'Deportes' , u'http://20minutos.feedsportal.com/c/32489/f/478286/index.rss')
|
(u'Internacional', u'http://20minutos.feedsportal.com/c/32489/f/492689/index.rss'),
|
||||||
,(u'Nacional' , u'http://20minutos.feedsportal.com/c/32489/f/492688/index.rss')
|
(u'Deportes', u'http://20minutos.feedsportal.com/c/32489/f/478286/index.rss'),
|
||||||
,(u'Economia' , u'http://20minutos.feedsportal.com/c/32489/f/492690/index.rss')
|
(u'Nacional', u'http://20minutos.feedsportal.com/c/32489/f/492688/index.rss'),
|
||||||
,(u'Tecnologia' , u'http://20minutos.feedsportal.com/c/32489/f/478292/index.rss')
|
(u'Economia', u'http://20minutos.feedsportal.com/c/32489/f/492690/index.rss'),
|
||||||
]
|
(u'Tecnologia', u'http://20minutos.feedsportal.com/c/32489/f/478292/index.rss')
|
||||||
|
]
|
||||||
|
|
||||||
def preprocess_html(self, soup):
|
def preprocess_html(self, soup):
|
||||||
for item in soup.findAll(style=True):
|
for item in soup.findAll(style=True):
|
||||||
@ -52,17 +51,16 @@ class t20Minutos(BasicNewsRecipe):
|
|||||||
for item in soup.findAll('a'):
|
for item in soup.findAll('a'):
|
||||||
limg = item.find('img')
|
limg = item.find('img')
|
||||||
if item.string is not None:
|
if item.string is not None:
|
||||||
str = item.string
|
str = item.string
|
||||||
item.replaceWith(str)
|
item.replaceWith(str)
|
||||||
else:
|
else:
|
||||||
if limg:
|
if limg:
|
||||||
item.name = 'div'
|
item.name = 'div'
|
||||||
item.attrs = []
|
item.attrs = []
|
||||||
else:
|
else:
|
||||||
str = self.tag_to_string(item)
|
str = self.tag_to_string(item)
|
||||||
item.replaceWith(str)
|
item.replaceWith(str)
|
||||||
for item in soup.findAll('img'):
|
for item in soup.findAll('img'):
|
||||||
if not item.has_key('alt'):
|
if not item.has_key('alt'): # noqa
|
||||||
item['alt'] = 'image'
|
item['alt'] = 'image'
|
||||||
return soup
|
return soup
|
||||||
|
|
||||||
|
@ -1,6 +1,6 @@
|
|||||||
#!/usr/bin/env python2
|
#!/usr/bin/env python2
|
||||||
|
|
||||||
__license__ = 'GPL v3'
|
__license__ = 'GPL v3'
|
||||||
__copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>'
|
__copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>'
|
||||||
|
|
||||||
'''
|
'''
|
||||||
@ -11,51 +11,50 @@ import re
|
|||||||
from calibre.web.feeds.recipes import BasicNewsRecipe
|
from calibre.web.feeds.recipes import BasicNewsRecipe
|
||||||
from calibre.ebooks.BeautifulSoup import Tag
|
from calibre.ebooks.BeautifulSoup import Tag
|
||||||
|
|
||||||
|
|
||||||
class Cro24Sata(BasicNewsRecipe):
|
class Cro24Sata(BasicNewsRecipe):
|
||||||
title = '24 Sata - Hr'
|
title = '24 Sata - Hr'
|
||||||
__author__ = 'Darko Miletic'
|
__author__ = 'Darko Miletic'
|
||||||
description = "News Portal from Croatia"
|
description = "News Portal from Croatia"
|
||||||
publisher = '24sata.hr'
|
publisher = '24sata.hr'
|
||||||
category = 'news, politics, Croatia'
|
category = 'news, politics, Croatia'
|
||||||
oldest_article = 2
|
oldest_article = 2
|
||||||
max_articles_per_feed = 100
|
max_articles_per_feed = 100
|
||||||
delay = 4
|
delay = 4
|
||||||
no_stylesheets = True
|
no_stylesheets = True
|
||||||
encoding = 'utf-8'
|
encoding = 'utf-8'
|
||||||
use_embedded_content = False
|
use_embedded_content = False
|
||||||
language = 'hr'
|
language = 'hr'
|
||||||
|
|
||||||
lang = 'hr-HR'
|
lang = 'hr-HR'
|
||||||
|
|
||||||
extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} body{font-family: serif1, serif} .article_description{font-family: serif1, serif}'
|
extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} body{font-family: serif1, serif} .article_description{font-family: serif1, serif}' # noqa
|
||||||
|
|
||||||
conversion_options = {
|
conversion_options = {
|
||||||
'comment' : description
|
'comment': description, 'tags': category, 'publisher': publisher, 'language': lang, 'pretty_print': True
|
||||||
, 'tags' : category
|
}
|
||||||
, 'publisher' : publisher
|
|
||||||
, 'language' : lang
|
|
||||||
, 'pretty_print' : True
|
|
||||||
}
|
|
||||||
|
|
||||||
preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')]
|
preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')]
|
||||||
|
|
||||||
remove_tags = [
|
remove_tags = [
|
||||||
dict(name=['object','link','embed'])
|
dict(name=['object', 'link', 'embed']), dict(
|
||||||
,dict(name='table', attrs={'class':'enumbox'})
|
name='table', attrs={'class': 'enumbox'})
|
||||||
]
|
]
|
||||||
|
|
||||||
feeds = [(u'Najnovije Vijesti', u'http://www.24sata.hr/index.php?cmd=show_rss&action=novo')]
|
feeds = [(u'Najnovije Vijesti',
|
||||||
|
u'http://www.24sata.hr/index.php?cmd=show_rss&action=novo')]
|
||||||
|
|
||||||
def preprocess_html(self, soup):
|
def preprocess_html(self, soup):
|
||||||
soup.html['lang'] = self.lang
|
soup.html['lang'] = self.lang
|
||||||
mlang = Tag(soup,'meta',[("http-equiv","Content-Language"),("content",self.lang)])
|
mlang = Tag(soup, 'meta', [
|
||||||
mcharset = Tag(soup,'meta',[("http-equiv","Content-Type"),("content","text/html; charset=UTF-8")])
|
("http-equiv", "Content-Language"), ("content", self.lang)])
|
||||||
soup.head.insert(0,mlang)
|
mcharset = Tag(soup, 'meta', [
|
||||||
soup.head.insert(1,mcharset)
|
("http-equiv", "Content-Type"), ("content", "text/html; charset=UTF-8")])
|
||||||
|
soup.head.insert(0, mlang)
|
||||||
|
soup.head.insert(1, mcharset)
|
||||||
for item in soup.findAll(style=True):
|
for item in soup.findAll(style=True):
|
||||||
del item['style']
|
del item['style']
|
||||||
return soup
|
return soup
|
||||||
|
|
||||||
def print_version(self, url):
|
def print_version(self, url):
|
||||||
return url + '&action=ispis'
|
return url + '&action=ispis'
|
||||||
|
|
||||||
|
@ -1,6 +1,6 @@
|
|||||||
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
|
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
|
||||||
|
|
||||||
__license__ = 'GPL v3'
|
__license__ = 'GPL v3'
|
||||||
__copyright__ = '2009-2012, Darko Miletic <darko.miletic at gmail.com>'
|
__copyright__ = '2009-2012, Darko Miletic <darko.miletic at gmail.com>'
|
||||||
|
|
||||||
'''
|
'''
|
||||||
@ -10,40 +10,38 @@ __copyright__ = '2009-2012, Darko Miletic <darko.miletic at gmail.com>'
|
|||||||
import re
|
import re
|
||||||
from calibre.web.feeds.recipes import BasicNewsRecipe
|
from calibre.web.feeds.recipes import BasicNewsRecipe
|
||||||
|
|
||||||
|
|
||||||
class Ser24Sata(BasicNewsRecipe):
|
class Ser24Sata(BasicNewsRecipe):
|
||||||
title = '24 Sata - Sr'
|
title = '24 Sata - Sr'
|
||||||
__author__ = 'Darko Miletic'
|
__author__ = 'Darko Miletic'
|
||||||
description = '24 sata portal vesti iz Srbije'
|
description = '24 sata portal vesti iz Srbije'
|
||||||
publisher = 'Ringier d.o.o.'
|
publisher = 'Ringier d.o.o.'
|
||||||
category = 'news, politics, entertainment, Serbia'
|
category = 'news, politics, entertainment, Serbia'
|
||||||
oldest_article = 2
|
oldest_article = 2
|
||||||
max_articles_per_feed = 100
|
max_articles_per_feed = 100
|
||||||
no_stylesheets = True
|
no_stylesheets = True
|
||||||
encoding = 'utf-8'
|
encoding = 'utf-8'
|
||||||
use_embedded_content = False
|
use_embedded_content = False
|
||||||
language = 'sr'
|
language = 'sr'
|
||||||
publication_type = 'newsportal'
|
publication_type = 'newsportal'
|
||||||
extra_css = """
|
extra_css = """
|
||||||
@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)}
|
@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)}
|
||||||
body{font-family: serif1, serif}
|
body{font-family: serif1, serif}
|
||||||
"""
|
"""
|
||||||
|
|
||||||
conversion_options = {
|
conversion_options = {
|
||||||
'comment' : description
|
'comment': description, 'tags': category, 'publisher': publisher, 'language': language
|
||||||
, 'tags' : category
|
}
|
||||||
, 'publisher': publisher
|
|
||||||
, 'language' : language
|
|
||||||
}
|
|
||||||
|
|
||||||
preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')]
|
preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')]
|
||||||
|
|
||||||
feeds = [
|
feeds = [
|
||||||
(u'Vesti' , u'http://www.24sata.rs/rss/vesti.xml' ),
|
(u'Vesti', u'http://www.24sata.rs/rss/vesti.xml'),
|
||||||
(u'Sport' , u'http://www.24sata.rs/rss/sport.xml' ),
|
(u'Sport', u'http://www.24sata.rs/rss/sport.xml'),
|
||||||
(u'Šou' , u'http://www.24sata.rs/rss/sou.xml' ),
|
(u'Šou', u'http://www.24sata.rs/rss/sou.xml'),
|
||||||
(u'Specijal', u'http://www.24sata.rs/rss/specijal.xml'),
|
(u'Specijal', u'http://www.24sata.rs/rss/specijal.xml'),
|
||||||
(u'Novi Sad', u'http://www.24sata.rs/rss/ns.xml' )
|
(u'Novi Sad', u'http://www.24sata.rs/rss/ns.xml')
|
||||||
]
|
]
|
||||||
|
|
||||||
def print_version(self, url):
|
def print_version(self, url):
|
||||||
dpart, spart, apart = url.rpartition('/')
|
dpart, spart, apart = url.rpartition('/')
|
||||||
|
@ -3,44 +3,63 @@
|
|||||||
from __future__ import unicode_literals, division, absolute_import, print_function
|
from __future__ import unicode_literals, division, absolute_import, print_function
|
||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
|
||||||
|
|
||||||
class AdvancedUserRecipe1438446837(BasicNewsRecipe):
|
class AdvancedUserRecipe1438446837(BasicNewsRecipe):
|
||||||
title = '3DNews: Daily Digital Digest'
|
title = '3DNews: Daily Digital Digest'
|
||||||
__author__ = 'bugmen00t'
|
__author__ = 'bugmen00t'
|
||||||
description = 'Независимое российское онлайн-издание, посвященное цифровым технологиям'
|
description = 'Независимое российское онлайн-издание, посвященное цифровым технологиям'
|
||||||
publisher = '3DNews'
|
publisher = '3DNews'
|
||||||
category = 'news'
|
category = 'news'
|
||||||
cover_url = u'http://www.3dnews.ru/assets/images/logo.png'
|
cover_url = u'http://www.3dnews.ru/assets/images/logo.png'
|
||||||
language = 'ru'
|
language = 'ru'
|
||||||
auto_cleanup = True
|
auto_cleanup = True
|
||||||
|
|
||||||
oldest_article = 15
|
oldest_article = 15
|
||||||
max_articles_per_feed = 60
|
max_articles_per_feed = 60
|
||||||
|
|
||||||
feeds = [
|
feeds = [
|
||||||
('\u041d\u043e\u0432\u043e\u0441\u0442\u0438 Hardware', 'http://www.3dnews.ru/news/rss/'),
|
('\u041d\u043e\u0432\u043e\u0441\u0442\u0438 Hardware',
|
||||||
('\u041d\u043e\u0432\u043e\u0441\u0442\u0438 Software', 'http://www.3dnews.ru/software-news/rss/'),
|
'http://www.3dnews.ru/news/rss/'),
|
||||||
('\u0423\u043c\u043d\u044b\u0435 \u0432\u0435\u0449\u0438', 'http://www.3dnews.ru/smart-things/rss/'),
|
('\u041d\u043e\u0432\u043e\u0441\u0442\u0438 Software',
|
||||||
('\u0410\u043d\u0430\u043b\u0438\u0442\u0438\u043a\u0430', 'http://www.3dnews.ru/editorial/rss/'),
|
'http://www.3dnews.ru/software-news/rss/'),
|
||||||
('\u041f\u0440\u043e\u0446\u0435\u0441\u0441\u043e\u0440\u044b \u0438 \u043f\u0430\u043c\u044f\u0442\u044c', 'http://www.3dnews.ru/cpu/rss/'),
|
('\u0423\u043c\u043d\u044b\u0435 \u0432\u0435\u0449\u0438',
|
||||||
('\u041c\u0430\u0442\u0435\u0440\u0438\u043d\u0441\u043a\u0438\u0435 \u043f\u043b\u0430\u0442\u044b', 'http://www.3dnews.ru/motherboard/rss/'),
|
'http://www.3dnews.ru/smart-things/rss/'),
|
||||||
|
('\u0410\u043d\u0430\u043b\u0438\u0442\u0438\u043a\u0430',
|
||||||
|
'http://www.3dnews.ru/editorial/rss/'),
|
||||||
|
('\u041f\u0440\u043e\u0446\u0435\u0441\u0441\u043e\u0440\u044b \u0438 \u043f\u0430\u043c\u044f\u0442\u044c',
|
||||||
|
'http://www.3dnews.ru/cpu/rss/'),
|
||||||
|
('\u041c\u0430\u0442\u0435\u0440\u0438\u043d\u0441\u043a\u0438\u0435 \u043f\u043b\u0430\u0442\u044b',
|
||||||
|
'http://www.3dnews.ru/motherboard/rss/'),
|
||||||
('\u041a\u043e\u0440\u043f\u0443\u0441\u0430, \u0411\u041f \u0438 \u043e\u0445\u043b\u0430\u0436\u0434\u0435\u043d\u0438\u0435',
|
('\u041a\u043e\u0440\u043f\u0443\u0441\u0430, \u0411\u041f \u0438 \u043e\u0445\u043b\u0430\u0436\u0434\u0435\u043d\u0438\u0435',
|
||||||
'http://www.3dnews.ru/cooling/rss/'),
|
'http://www.3dnews.ru/cooling/rss/'),
|
||||||
('\u0412\u0438\u0434\u0435\u043e\u043a\u0430\u0440\u0442\u044b', 'http://www.3dnews.ru/video/rss/'),
|
('\u0412\u0438\u0434\u0435\u043e\u043a\u0430\u0440\u0442\u044b',
|
||||||
('\u041c\u043e\u043d\u0438\u0442\u043e\u0440\u044b \u0438 \u043f\u0440\u043e\u0435\u043a\u0442\u043e\u0440\u044b', 'http://www.3dnews.ru/display/rss/'),
|
'http://www.3dnews.ru/video/rss/'),
|
||||||
('\u041d\u0430\u043a\u043e\u043f\u0438\u0442\u0435\u043b\u0438', 'http://www.3dnews.ru/storage/rss/'),
|
('\u041c\u043e\u043d\u0438\u0442\u043e\u0440\u044b \u0438 \u043f\u0440\u043e\u0435\u043a\u0442\u043e\u0440\u044b',
|
||||||
('\u0426\u0438\u0444\u0440\u043e\u0432\u043e\u0439 \u0430\u0432\u0442\u043e\u043c\u043e\u0431\u0438\u043b\u044c', 'http://www.3dnews.ru/auto/rss/'),
|
'http://www.3dnews.ru/display/rss/'),
|
||||||
('\u0421\u043e\u0442\u043e\u0432\u0430\u044f \u0441\u0432\u044f\u0437\u044c', 'http://www.3dnews.ru/phone/rss/'),
|
('\u041d\u0430\u043a\u043e\u043f\u0438\u0442\u0435\u043b\u0438',
|
||||||
('\u041f\u0435\u0440\u0438\u0444\u0435\u0440\u0438\u044f', 'http://www.3dnews.ru/peripheral/rss/'),
|
'http://www.3dnews.ru/storage/rss/'),
|
||||||
('\u041d\u043e\u0443\u0442\u0431\u0443\u043a\u0438 \u0438 \u041f\u041a', 'http://www.3dnews.ru/mobile/rss/'),
|
('\u0426\u0438\u0444\u0440\u043e\u0432\u043e\u0439 \u0430\u0432\u0442\u043e\u043c\u043e\u0431\u0438\u043b\u044c',
|
||||||
('\u041f\u043b\u0430\u043d\u0448\u0435\u0442\u044b', 'http://www.3dnews.ru/tablets/rss/'),
|
'http://www.3dnews.ru/auto/rss/'),
|
||||||
('\u0417\u0432\u0443\u043a \u0438 \u0430\u043a\u0443\u0441\u0442\u0438\u043a\u0430', 'http://www.3dnews.ru/multimedia/rss/'),
|
('\u0421\u043e\u0442\u043e\u0432\u0430\u044f \u0441\u0432\u044f\u0437\u044c',
|
||||||
|
'http://www.3dnews.ru/phone/rss/'),
|
||||||
|
('\u041f\u0435\u0440\u0438\u0444\u0435\u0440\u0438\u044f',
|
||||||
|
'http://www.3dnews.ru/peripheral/rss/'),
|
||||||
|
('\u041d\u043e\u0443\u0442\u0431\u0443\u043a\u0438 \u0438 \u041f\u041a',
|
||||||
|
'http://www.3dnews.ru/mobile/rss/'),
|
||||||
|
('\u041f\u043b\u0430\u043d\u0448\u0435\u0442\u044b',
|
||||||
|
'http://www.3dnews.ru/tablets/rss/'),
|
||||||
|
('\u0417\u0432\u0443\u043a \u0438 \u0430\u043a\u0443\u0441\u0442\u0438\u043a\u0430',
|
||||||
|
'http://www.3dnews.ru/multimedia/rss/'),
|
||||||
('\u0426\u0438\u0444\u0440\u043e\u0432\u043e\u0435 \u0444\u043e\u0442\u043e \u0438 \u0432\u0438\u0434\u0435\u043e',
|
('\u0426\u0438\u0444\u0440\u043e\u0432\u043e\u0435 \u0444\u043e\u0442\u043e \u0438 \u0432\u0438\u0434\u0435\u043e',
|
||||||
'http://www.3dnews.ru/digital/rss/'),
|
'http://www.3dnews.ru/digital/rss/'),
|
||||||
('\u0421\u0435\u0442\u0438 \u0438 \u043a\u043e\u043c\u043c\u0443\u043d\u0438\u043a\u0430\u0446\u0438\u0438', 'http://www.3dnews.ru/communication/rss/'),
|
('\u0421\u0435\u0442\u0438 \u0438 \u043a\u043e\u043c\u043c\u0443\u043d\u0438\u043a\u0430\u0446\u0438\u0438',
|
||||||
|
'http://www.3dnews.ru/communication/rss/'),
|
||||||
('\u0418\u0433\u0440\u044b', 'http://www.3dnews.ru/games/rss/'),
|
('\u0418\u0433\u0440\u044b', 'http://www.3dnews.ru/games/rss/'),
|
||||||
('\u041f\u0440\u043e\u0433\u0440\u0430\u043c\u043c\u043d\u043e\u0435 \u043e\u0431\u0435\u0441\u043f\u0435\u0447\u0435\u043d\u0438\u0435',
|
('\u041f\u0440\u043e\u0433\u0440\u0430\u043c\u043c\u043d\u043e\u0435 \u043e\u0431\u0435\u0441\u043f\u0435\u0447\u0435\u043d\u0438\u0435',
|
||||||
'http://www.3dnews.ru/software/rss/'),
|
'http://www.3dnews.ru/software/rss/'),
|
||||||
('Off-\u0441\u044f\u043d\u043a\u0430', 'http://www.3dnews.ru/offsyanka/rss/'),
|
('Off-\u0441\u044f\u043d\u043a\u0430',
|
||||||
('\u041c\u0430\u0441\u0442\u0435\u0440\u0441\u043a\u0430\u044f', 'http://www.3dnews.ru/workshop/rss/'),
|
'http://www.3dnews.ru/offsyanka/rss/'),
|
||||||
|
('\u041c\u0430\u0441\u0442\u0435\u0440\u0441\u043a\u0430\u044f',
|
||||||
|
'http://www.3dnews.ru/workshop/rss/'),
|
||||||
('ServerNews', 'http://servernews.ru/rss'),
|
('ServerNews', 'http://servernews.ru/rss'),
|
||||||
]
|
]
|
||||||
|
@ -1,6 +1,6 @@
|
|||||||
#!/usr/bin/env python2
|
#!/usr/bin/env python2
|
||||||
|
|
||||||
__license__ = 'GPL v3'
|
__license__ = 'GPL v3'
|
||||||
__copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>'
|
__copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>'
|
||||||
'''
|
'''
|
||||||
elargentino.com
|
elargentino.com
|
||||||
@ -9,33 +9,26 @@ elargentino.com
|
|||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
from calibre.ebooks.BeautifulSoup import Tag
|
from calibre.ebooks.BeautifulSoup import Tag
|
||||||
|
|
||||||
|
|
||||||
class SieteDias(BasicNewsRecipe):
|
class SieteDias(BasicNewsRecipe):
|
||||||
title = '7 dias'
|
title = '7 dias'
|
||||||
__author__ = 'Darko Miletic'
|
__author__ = 'Darko Miletic'
|
||||||
description = 'Revista Argentina'
|
description = 'Revista Argentina'
|
||||||
publisher = 'ElArgentino.com'
|
publisher = 'ElArgentino.com'
|
||||||
category = 'news, politics, show, Argentina'
|
category = 'news, politics, show, Argentina'
|
||||||
oldest_article = 7
|
oldest_article = 7
|
||||||
max_articles_per_feed = 100
|
max_articles_per_feed = 100
|
||||||
no_stylesheets = True
|
no_stylesheets = True
|
||||||
use_embedded_content = False
|
use_embedded_content = False
|
||||||
encoding = 'utf-8'
|
encoding = 'utf-8'
|
||||||
language = 'es_AR'
|
language = 'es_AR'
|
||||||
|
|
||||||
lang = 'es-AR'
|
lang = 'es-AR'
|
||||||
direction = 'ltr'
|
direction = 'ltr'
|
||||||
INDEX = 'http://www.elargentino.com/medios/125/7-Dias.html'
|
INDEX = 'http://www.elargentino.com/medios/125/7-Dias.html'
|
||||||
extra_css = ' .titulo{font-size: x-large; font-weight: bold} .volantaImp{font-size: small; font-weight: bold} '
|
extra_css = ' .titulo{font-size: x-large; font-weight: bold} .volantaImp{font-size: small; font-weight: bold} '
|
||||||
|
|
||||||
html2lrf_options = [
|
keep_only_tags = [dict(name='div', attrs={'class': 'ContainerPop'})]
|
||||||
'--comment' , description
|
|
||||||
, '--category' , category
|
|
||||||
, '--publisher', publisher
|
|
||||||
]
|
|
||||||
|
|
||||||
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"\noverride_css=" p {text-indent: 0cm; margin-top: 0em; margin-bottom: 0.5em} "'
|
|
||||||
|
|
||||||
keep_only_tags = [dict(name='div', attrs={'class':'ContainerPop'})]
|
|
||||||
|
|
||||||
remove_tags = [dict(name='link')]
|
remove_tags = [dict(name='link')]
|
||||||
|
|
||||||
@ -50,20 +43,23 @@ class SieteDias(BasicNewsRecipe):
|
|||||||
for item in soup.findAll(style=True):
|
for item in soup.findAll(style=True):
|
||||||
del item['style']
|
del item['style']
|
||||||
soup.html['lang'] = self.lang
|
soup.html['lang'] = self.lang
|
||||||
soup.html['dir' ] = self.direction
|
soup.html['dir'] = self.direction
|
||||||
mlang = Tag(soup,'meta',[("http-equiv","Content-Language"),("content",self.lang)])
|
mlang = Tag(soup, 'meta', [
|
||||||
mcharset = Tag(soup,'meta',[("http-equiv","Content-Type"),("content","text/html; charset=utf-8")])
|
("http-equiv", "Content-Language"), ("content", self.lang)])
|
||||||
soup.head.insert(0,mlang)
|
mcharset = Tag(soup, 'meta', [
|
||||||
soup.head.insert(1,mcharset)
|
("http-equiv", "Content-Type"), ("content", "text/html; charset=utf-8")])
|
||||||
|
soup.head.insert(0, mlang)
|
||||||
|
soup.head.insert(1, mcharset)
|
||||||
return soup
|
return soup
|
||||||
|
|
||||||
def get_cover_url(self):
|
def get_cover_url(self):
|
||||||
cover_url = None
|
cover_url = None
|
||||||
soup = self.index_to_soup(self.INDEX)
|
soup = self.index_to_soup(self.INDEX)
|
||||||
cover_item = soup.find('div',attrs={'class':'colder'})
|
cover_item = soup.find('div', attrs={'class': 'colder'})
|
||||||
if cover_item:
|
if cover_item:
|
||||||
clean_url = self.image_url_processor(None,cover_item.div.img['src'])
|
clean_url = self.image_url_processor(
|
||||||
cover_url = 'http://www.elargentino.com' + clean_url + '&height=600'
|
None, cover_item.div.img['src'])
|
||||||
|
cover_url = 'http://www.elargentino.com' + clean_url + '&height=600'
|
||||||
return cover_url
|
return cover_url
|
||||||
|
|
||||||
def image_url_processor(self, baseurl, url):
|
def image_url_processor(self, baseurl, url):
|
||||||
|
@ -1,7 +1,7 @@
|
|||||||
#!/usr/bin/env python2
|
#!/usr/bin/env python2
|
||||||
# -*- coding: utf-8 -*-
|
# -*- coding: utf-8 -*-
|
||||||
|
|
||||||
__license__ = 'GPL v3'
|
__license__ = 'GPL v3'
|
||||||
__copyright__ = u'2011, Silviu Cotoar\u0103'
|
__copyright__ = u'2011, Silviu Cotoar\u0103'
|
||||||
'''
|
'''
|
||||||
sapteseri.ro
|
sapteseri.ro
|
||||||
@ -9,43 +9,40 @@ sapteseri.ro
|
|||||||
|
|
||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
|
||||||
|
|
||||||
class SapteSeri(BasicNewsRecipe):
|
class SapteSeri(BasicNewsRecipe):
|
||||||
title = u'Sapte Seri'
|
title = u'Sapte Seri'
|
||||||
__author__ = u'Silviu Cotoar\u0103'
|
__author__ = u'Silviu Cotoar\u0103'
|
||||||
description = u'Sapte Seri'
|
description = u'Sapte Seri'
|
||||||
publisher = u'Sapte Seri'
|
publisher = u'Sapte Seri'
|
||||||
oldest_article = 5
|
oldest_article = 5
|
||||||
language = 'ro'
|
language = 'ro'
|
||||||
max_articles_per_feed = 100
|
max_articles_per_feed = 100
|
||||||
no_stylesheets = True
|
no_stylesheets = True
|
||||||
use_embedded_content = False
|
use_embedded_content = False
|
||||||
category = 'Ziare,Oras,Distractie,Fun'
|
category = 'Ziare,Oras,Distractie,Fun'
|
||||||
encoding = 'utf-8'
|
encoding = 'utf-8'
|
||||||
remove_empty_feeds = True
|
remove_empty_feeds = True
|
||||||
remove_javascript = True
|
remove_javascript = True
|
||||||
cover_url = 'http://www.sapteseri.ro/Images/logo.jpg'
|
cover_url = 'http://www.sapteseri.ro/Images/logo.jpg'
|
||||||
|
|
||||||
conversion_options = {
|
conversion_options = {
|
||||||
'comments' : description
|
'comments': description, 'tags': category, 'language': language, 'publisher': publisher
|
||||||
,'tags' : category
|
}
|
||||||
,'language' : language
|
|
||||||
,'publisher' : publisher
|
|
||||||
}
|
|
||||||
|
|
||||||
keep_only_tags = [
|
keep_only_tags = [
|
||||||
dict(name='h1', attrs={'id':'title'})
|
dict(name='h1', attrs={'id': 'title'}), dict(name='div', attrs={'class': 'mt10 mb10'}), dict(
|
||||||
, dict(name='div', attrs={'class':'mt10 mb10'})
|
name='div', attrs={'class': 'mb20 mt10'}), dict(name='div', attrs={'class': 'mt5 mb20'})
|
||||||
, dict(name='div', attrs={'class':'mb20 mt10'})
|
]
|
||||||
, dict(name='div', attrs={'class':'mt5 mb20'})
|
|
||||||
]
|
|
||||||
|
|
||||||
remove_tags = [
|
remove_tags = [
|
||||||
dict(name='div', attrs={'id':['entityimgworking']})
|
dict(name='div', attrs={'id': ['entityimgworking']})
|
||||||
]
|
]
|
||||||
|
|
||||||
feeds = [
|
feeds = [
|
||||||
(u'Ce se intampla azi in Bucuresti', u'http://www.sapteseri.ro/ro/feed/ce-se-intampla-azi/bucuresti/')
|
(u'Ce se intampla azi in Bucuresti',
|
||||||
]
|
u'http://www.sapteseri.ro/ro/feed/ce-se-intampla-azi/bucuresti/')
|
||||||
|
]
|
||||||
|
|
||||||
def preprocess_html(self, soup):
|
def preprocess_html(self, soup):
|
||||||
return self.adeify_images(soup)
|
return self.adeify_images(soup)
|
||||||
|
@ -1,69 +1,70 @@
|
|||||||
#!/usr/bin/env python2
|
#!/usr/bin/env python2
|
||||||
__license__ = 'GPL v3'
|
__license__ = 'GPL v3'
|
||||||
__author__ = 'Gabriele Marini, based on Darko Miletic'
|
__author__ = 'Gabriele Marini, based on Darko Miletic'
|
||||||
__copyright__ = '2010, Darko Miletic <darko.miletic at gmail.com>'
|
__copyright__ = '2010, Darko Miletic <darko.miletic at gmail.com>'
|
||||||
description = 'Italian daily newspaper - 01-05-2010'
|
description = 'Italian daily newspaper - 01-05-2010'
|
||||||
'''
|
'''
|
||||||
http://www.ansa.it/
|
http://www.ansa.it/
|
||||||
'''
|
'''
|
||||||
|
|
||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
|
||||||
|
|
||||||
class Ansa(BasicNewsRecipe):
|
class Ansa(BasicNewsRecipe):
|
||||||
__author__ = 'Gabriele Marini'
|
__author__ = 'Gabriele Marini'
|
||||||
description = 'Italian News Agency'
|
description = 'Italian News Agency'
|
||||||
|
|
||||||
cover_url = 'http://www.ansa.it/web/images/logo_ansa_interna.gif'
|
cover_url = 'http://www.ansa.it/web/images/logo_ansa_interna.gif'
|
||||||
title = u'Ansa'
|
title = u'Ansa'
|
||||||
publisher = 'Ansa'
|
publisher = 'Ansa'
|
||||||
category = 'News, politics, culture, economy, general interest'
|
category = 'News, politics, culture, economy, general interest'
|
||||||
|
|
||||||
language = 'it'
|
language = 'it'
|
||||||
timefmt = '[%a, %d %b, %Y]'
|
timefmt = '[%a, %d %b, %Y]'
|
||||||
|
|
||||||
oldest_article = 1
|
oldest_article = 1
|
||||||
max_articles_per_feed = 10
|
max_articles_per_feed = 10
|
||||||
use_embedded_content = False
|
use_embedded_content = False
|
||||||
recursion = 10
|
recursion = 10
|
||||||
|
|
||||||
remove_javascript = True
|
remove_javascript = True
|
||||||
no_stylesheets = True
|
no_stylesheets = True
|
||||||
conversion_options = {'linearize_tables':True}
|
conversion_options = {'linearize_tables': True}
|
||||||
remove_attributes = ['colspan']
|
remove_attributes = ['colspan']
|
||||||
|
|
||||||
keep_only_tags = [dict(name='div', attrs={'class':['path','header-content','corpo']}),
|
keep_only_tags = [dict(name='div', attrs={'class': ['path', 'header-content', 'corpo']}),
|
||||||
]
|
|
||||||
|
|
||||||
|
|
||||||
remove_tags = [
|
|
||||||
dict(name='div', attrs={'class':'tools-bar'}),
|
|
||||||
dict(name='div', attrs={'id':['rssdiv','blocco']})
|
|
||||||
]
|
|
||||||
|
|
||||||
|
|
||||||
feeds = [
|
|
||||||
(u'HomePage', u'http://www.ansa.it/web/ansait_web_rss_homepage.xml'),
|
|
||||||
(u'Top New', u'http://www.ansa.it/web/notizie/rubriche/topnews/topnews_rss.xml'),
|
|
||||||
(u'Cronaca', u'http://www.ansa.it/web/notizie/rubriche/cronaca/cronaca_rss.xml'),
|
|
||||||
(u'Mondo', u'http://www.ansa.it/web/notizie/rubriche/mondo/mondo_rss.xml'),
|
|
||||||
(u'Economia', u'http://www.ansa.it/web/notizie/rubriche/economia/economia_rss.xml'),
|
|
||||||
(u'Politica', u'http://www.ansa.it/web/notizie/rubriche/politica/politica_rss.xml'),
|
|
||||||
(u'Scienze', u'http://www.ansa.it/web/notizie/rubriche/scienza/scienza_rss.xml'),
|
|
||||||
(u'Cinema', u'http://www.ansa.it/web/notizie/rubriche/cinema/cinema_rss.xml'),
|
|
||||||
(u'Tecnologia e Internet', u'http://www.ansa.it/web/notizie/rubriche/tecnologia/tecnologia_rss.xml'),
|
|
||||||
(u'Spettacolo', u'http://www.ansa.it/web/notizie/rubriche/spettacolo/spettacolo_rss.xml'),
|
|
||||||
(u'Cultura e Tendenze', u'http://www.ansa.it/web/notizie/rubriche/cultura/cultura_rss.xml'),
|
|
||||||
(u'Sport', u'http://www.ansa.it/web/notizie/rubriche/altrisport/altrisport_rss.xml'),
|
|
||||||
(u'Calcio', u'http://www.ansa.it/web/notizie/rubriche/calcio/calcio_rss.xml'),
|
|
||||||
(u'Lazio', u'http://www.ansa.it/web/notizie/regioni/lazio/lazio_rss.xml'),
|
|
||||||
(u'Lombardia', u'http://www.ansa.it/web/notizie/regioni/lombardia/lombardia.shtml'),
|
|
||||||
(u'Veneto', u'http://www.ansa.it/web/notizie/regioni/veneto/veneto.shtml'),
|
|
||||||
(u'Campanioa', u'http://www.ansa.it/web/notizie/regioni/campania/campania.shtml'),
|
|
||||||
(u'Sicilia', u'http://www.ansa.it/web/notizie/regioni/sicilia/sicilia.shtml'),
|
|
||||||
(u'Toscana', u'http://www.ansa.it/web/notizie/regioni/toscana/toscana.shtml'),
|
|
||||||
(u'Trentino', u'http://www.ansa.it/web/notizie/regioni/trentino/trentino.shtml')
|
|
||||||
]
|
]
|
||||||
|
|
||||||
|
remove_tags = [
|
||||||
|
dict(name='div', attrs={'class': 'tools-bar'}),
|
||||||
|
dict(name='div', attrs={'id': ['rssdiv', 'blocco']})
|
||||||
|
]
|
||||||
|
|
||||||
|
feeds = [
|
||||||
|
(u'HomePage', u'http://www.ansa.it/web/ansait_web_rss_homepage.xml'),
|
||||||
|
(u'Top New', u'http://www.ansa.it/web/notizie/rubriche/topnews/topnews_rss.xml'),
|
||||||
|
(u'Cronaca', u'http://www.ansa.it/web/notizie/rubriche/cronaca/cronaca_rss.xml'),
|
||||||
|
(u'Mondo', u'http://www.ansa.it/web/notizie/rubriche/mondo/mondo_rss.xml'),
|
||||||
|
(u'Economia', u'http://www.ansa.it/web/notizie/rubriche/economia/economia_rss.xml'),
|
||||||
|
(u'Politica', u'http://www.ansa.it/web/notizie/rubriche/politica/politica_rss.xml'),
|
||||||
|
(u'Scienze', u'http://www.ansa.it/web/notizie/rubriche/scienza/scienza_rss.xml'),
|
||||||
|
(u'Cinema', u'http://www.ansa.it/web/notizie/rubriche/cinema/cinema_rss.xml'),
|
||||||
|
(u'Tecnologia e Internet',
|
||||||
|
u'http://www.ansa.it/web/notizie/rubriche/tecnologia/tecnologia_rss.xml'),
|
||||||
|
(u'Spettacolo', u'http://www.ansa.it/web/notizie/rubriche/spettacolo/spettacolo_rss.xml'),
|
||||||
|
(u'Cultura e Tendenze',
|
||||||
|
u'http://www.ansa.it/web/notizie/rubriche/cultura/cultura_rss.xml'),
|
||||||
|
(u'Sport', u'http://www.ansa.it/web/notizie/rubriche/altrisport/altrisport_rss.xml'),
|
||||||
|
(u'Calcio', u'http://www.ansa.it/web/notizie/rubriche/calcio/calcio_rss.xml'),
|
||||||
|
(u'Lazio', u'http://www.ansa.it/web/notizie/regioni/lazio/lazio_rss.xml'),
|
||||||
|
(u'Lombardia', u'http://www.ansa.it/web/notizie/regioni/lombardia/lombardia.shtml'),
|
||||||
|
(u'Veneto', u'http://www.ansa.it/web/notizie/regioni/veneto/veneto.shtml'),
|
||||||
|
(u'Campanioa', u'http://www.ansa.it/web/notizie/regioni/campania/campania.shtml'),
|
||||||
|
(u'Sicilia', u'http://www.ansa.it/web/notizie/regioni/sicilia/sicilia.shtml'),
|
||||||
|
(u'Toscana', u'http://www.ansa.it/web/notizie/regioni/toscana/toscana.shtml'),
|
||||||
|
(u'Trentino', u'http://www.ansa.it/web/notizie/regioni/trentino/trentino.shtml')
|
||||||
|
]
|
||||||
|
|
||||||
extra_css = '''
|
extra_css = '''
|
||||||
.path{font-style: italic; font-size: small}
|
.path{font-style: italic; font-size: small}
|
||||||
.header-content h1{font-weight: bold; font-size: xx-large}
|
.header-content h1{font-weight: bold; font-size: xx-large}
|
||||||
|
@ -1,21 +1,22 @@
|
|||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
import re
|
import re
|
||||||
|
|
||||||
|
|
||||||
class DrawAndCook(BasicNewsRecipe):
|
class DrawAndCook(BasicNewsRecipe):
|
||||||
title = 'DrawAndCook'
|
title = 'DrawAndCook'
|
||||||
__author__ = 'Starson17'
|
__author__ = 'Starson17'
|
||||||
__version__ = 'v1.10'
|
__version__ = 'v1.10'
|
||||||
__date__ = '13 March 2011'
|
__date__ = '13 March 2011'
|
||||||
description = 'Drawings of recipes!'
|
description = 'Drawings of recipes!'
|
||||||
language = 'en'
|
language = 'en'
|
||||||
publisher = 'Starson17'
|
publisher = 'Starson17'
|
||||||
category = 'news, food, recipes'
|
category = 'news, food, recipes'
|
||||||
use_embedded_content= False
|
use_embedded_content = False
|
||||||
no_stylesheets = True
|
no_stylesheets = True
|
||||||
oldest_article = 24
|
oldest_article = 24
|
||||||
remove_javascript = True
|
remove_javascript = True
|
||||||
remove_empty_feeds = True
|
remove_empty_feeds = True
|
||||||
cover_url = 'http://farm5.static.flickr.com/4043/4471139063_4dafced67f_o.jpg'
|
cover_url = 'http://farm5.static.flickr.com/4043/4471139063_4dafced67f_o.jpg'
|
||||||
INDEX = 'http://www.theydrawandcook.com'
|
INDEX = 'http://www.theydrawandcook.com'
|
||||||
max_articles_per_feed = 30
|
max_articles_per_feed = 30
|
||||||
|
|
||||||
@ -24,8 +25,8 @@ class DrawAndCook(BasicNewsRecipe):
|
|||||||
def parse_index(self):
|
def parse_index(self):
|
||||||
feeds = []
|
feeds = []
|
||||||
for title, url in [
|
for title, url in [
|
||||||
("They Draw and Cook", "http://www.theydrawandcook.com/")
|
("They Draw and Cook", "http://www.theydrawandcook.com/")
|
||||||
]:
|
]:
|
||||||
articles = self.make_links(url)
|
articles = self.make_links(url)
|
||||||
if articles:
|
if articles:
|
||||||
feeds.append((title, articles))
|
feeds.append((title, articles))
|
||||||
@ -38,22 +39,24 @@ class DrawAndCook(BasicNewsRecipe):
|
|||||||
date = ''
|
date = ''
|
||||||
current_articles = []
|
current_articles = []
|
||||||
soup = self.index_to_soup(url)
|
soup = self.index_to_soup(url)
|
||||||
featured_major_slider = soup.find(name='div', attrs={'id':'featured_major_slider'})
|
featured_major_slider = soup.find(
|
||||||
recipes = featured_major_slider.findAll('li', attrs={'data-id': re.compile(r'artwork_entry_\d+', re.DOTALL)})
|
name='div', attrs={'id': 'featured_major_slider'})
|
||||||
|
recipes = featured_major_slider.findAll(
|
||||||
|
'li', attrs={'data-id': re.compile(r'artwork_entry_\d+', re.DOTALL)})
|
||||||
for recipe in recipes:
|
for recipe in recipes:
|
||||||
page_url = self.INDEX + recipe.a['href']
|
page_url = self.INDEX + recipe.a['href']
|
||||||
print 'page_url is: ', page_url
|
print 'page_url is: ', page_url
|
||||||
title = recipe.find('strong').string
|
title = recipe.find('strong').string
|
||||||
print 'title is: ', title
|
print 'title is: ', title
|
||||||
current_articles.append({'title': title, 'url': page_url, 'description':'', 'date':date})
|
current_articles.append(
|
||||||
|
{'title': title, 'url': page_url, 'description': '', 'date': date})
|
||||||
return current_articles
|
return current_articles
|
||||||
|
|
||||||
keep_only_tags = [dict(name='h1', attrs={'id':'page_title'})
|
keep_only_tags = [dict(name='h1', attrs={'id': 'page_title'}), dict(name='section', attrs={'id': 'artwork'})
|
||||||
,dict(name='section', attrs={'id':'artwork'})
|
]
|
||||||
]
|
|
||||||
|
|
||||||
remove_tags = [dict(name='article', attrs={'id':['recipe_actions', 'metadata']})
|
remove_tags = [dict(name='article', attrs={'id': ['recipe_actions', 'metadata']})
|
||||||
]
|
]
|
||||||
|
|
||||||
extra_css = '''
|
extra_css = '''
|
||||||
h1{font-family:Arial,Helvetica,sans-serif; font-weight:bold;font-size:large;}
|
h1{font-family:Arial,Helvetica,sans-serif; font-weight:bold;font-size:large;}
|
||||||
@ -61,5 +64,4 @@ class DrawAndCook(BasicNewsRecipe):
|
|||||||
img {max-width:100%; min-width:100%;}
|
img {max-width:100%; min-width:100%;}
|
||||||
p{font-family:Arial,Helvetica,sans-serif;font-size:small;}
|
p{font-family:Arial,Helvetica,sans-serif;font-size:small;}
|
||||||
body{font-family:Helvetica,Arial,sans-serif;font-size:small;}
|
body{font-family:Helvetica,Arial,sans-serif;font-size:small;}
|
||||||
'''
|
'''
|
||||||
|
|
||||||
|
@ -2,9 +2,8 @@ from calibre.web.feeds.news import BasicNewsRecipe
|
|||||||
import re
|
import re
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
class ZiveRecipe(BasicNewsRecipe):
|
class ZiveRecipe(BasicNewsRecipe):
|
||||||
__license__ = 'GPL v3'
|
__license__ = 'GPL v3'
|
||||||
__author__ = 'Abelturd'
|
__author__ = 'Abelturd'
|
||||||
language = 'sk'
|
language = 'sk'
|
||||||
version = 1
|
version = 1
|
||||||
@ -25,21 +24,20 @@ class ZiveRecipe(BasicNewsRecipe):
|
|||||||
cover_url = 'http://www.zive.sk/Client.Images/Logos/logo-zive-sk.gif'
|
cover_url = 'http://www.zive.sk/Client.Images/Logos/logo-zive-sk.gif'
|
||||||
|
|
||||||
feeds = []
|
feeds = []
|
||||||
feeds.append((u'V\u0161etky \u010dl\xe1nky', u'http://www.zive.sk/rss/sc-47/default.aspx'))
|
feeds.append((u'V\u0161etky \u010dl\xe1nky',
|
||||||
|
u'http://www.zive.sk/rss/sc-47/default.aspx'))
|
||||||
|
|
||||||
preprocess_regexps = [
|
preprocess_regexps = [
|
||||||
(re.compile(r'<p><p><strong>Pokra.*ie</strong></p>', re.DOTALL|re.IGNORECASE),
|
(re.compile(r'<p><p><strong>Pokra.*ie</strong></p>', re.DOTALL | re.IGNORECASE),
|
||||||
lambda match: ''),
|
lambda match: ''),
|
||||||
|
|
||||||
]
|
|
||||||
|
|
||||||
|
]
|
||||||
|
|
||||||
remove_tags = []
|
remove_tags = []
|
||||||
|
|
||||||
keep_only_tags = [dict(name='h1'), dict(name='span', attrs={'class':'arlist-data-info-author'}), dict(name='div', attrs={'class':'bbtext font-resizer-area'}),]
|
keep_only_tags = [dict(name='h1'), dict(name='span', attrs={
|
||||||
|
'class': 'arlist-data-info-author'}), dict(name='div', attrs={'class': 'bbtext font-resizer-area'}), ]
|
||||||
extra_css = '''
|
extra_css = '''
|
||||||
h1 {font-size:140%;font-family:georgia,serif; font-weight:bold}
|
h1 {font-size:140%;font-family:georgia,serif; font-weight:bold}
|
||||||
h3 {font-size:115%;font-family:georgia,serif; font-weight:bold}
|
h3 {font-size:115%;font-family:georgia,serif; font-weight:bold}
|
||||||
'''
|
'''
|
||||||
|
|
||||||
|
|
||||||
|
@ -1,71 +1,113 @@
|
|||||||
from calibre.web.feeds.recipes import BasicNewsRecipe
|
from calibre.web.feeds.recipes import BasicNewsRecipe
|
||||||
|
|
||||||
|
|
||||||
class AdvancedUserRecipe(BasicNewsRecipe):
|
class AdvancedUserRecipe(BasicNewsRecipe):
|
||||||
|
|
||||||
title = u'Aachener Nachrichten'
|
title = u'Aachener Nachrichten'
|
||||||
__author__ = 'schuster' #AGE update 2012-11-28
|
__author__ = 'schuster' # AGE update 2012-11-28
|
||||||
oldest_article = 1
|
oldest_article = 1
|
||||||
max_articles_per_feed = 100
|
max_articles_per_feed = 100
|
||||||
no_stylesheets = True
|
no_stylesheets = True
|
||||||
remove_javascript = True
|
remove_javascript = True
|
||||||
remove_empty_feeds = True
|
remove_empty_feeds = True
|
||||||
language = 'de'
|
language = 'de'
|
||||||
|
|
||||||
# cover_url = 'http://www.aachener-nachrichten.de/img/logos/an_website_retina.png'
|
# cover_url = 'http://www.aachener-nachrichten.de/img/logos/an_website_retina.png'
|
||||||
masthead_url = 'http://www.aachener-nachrichten.de/img/logos/an_website_retina.png'
|
masthead_url = 'http://www.aachener-nachrichten.de/img/logos/an_website_retina.png'
|
||||||
|
|
||||||
keep_only_tags = [
|
keep_only_tags = [
|
||||||
dict(name='article', attrs={'class':['single']})
|
dict(name='article', attrs={'class': ['single']})
|
||||||
]
|
]
|
||||||
|
|
||||||
remove_tags = [
|
remove_tags = [
|
||||||
dict(name='div', attrs={'class':["clearfix navi-wrapper"]}),
|
dict(name='div', attrs={'class': ["clearfix navi-wrapper"]}),
|
||||||
dict(name='div', attrs={'id':["article_actions"]}),
|
dict(name='div', attrs={'id': ["article_actions"]}),
|
||||||
dict(name='style', attrs={'type':["text/css"]}),
|
dict(name='style', attrs={'type': ["text/css"]}),
|
||||||
dict(name='aside'),
|
dict(name='aside'),
|
||||||
dict(name='a', attrs={'class':["btn btn-action"]})
|
dict(name='a', attrs={'class': ["btn btn-action"]})
|
||||||
]
|
]
|
||||||
|
|
||||||
feeds = [
|
feeds = [
|
||||||
(u'Lokales - Euregio', u'http://www.aachener-nachrichten.de/cmlink/euregio-rss-1.357285'),
|
(u'Lokales - Euregio',
|
||||||
(u'Lokales - Aachen', u'http://www.aachener-nachrichten.de/cmlink/aachen-rss-1.357286'),
|
u'http://www.aachener-nachrichten.de/cmlink/euregio-rss-1.357285'),
|
||||||
(u'Lokales - Nordkreis', u'http://www.aachener-nachrichten.de/cmlink/nordkreis-rss-1.358150'),
|
(u'Lokales - Aachen',
|
||||||
(u'Lokales - Düren', u'http://www.aachener-nachrichten.de/cmlink/dueren-rss-1.358626'),
|
u'http://www.aachener-nachrichten.de/cmlink/aachen-rss-1.357286'),
|
||||||
(u'Lokales - Eiffel', u'http://www.aachener-nachrichten.de/cmlink/eifel-rss-1.358978'),
|
(u'Lokales - Nordkreis',
|
||||||
(u'Lokales - Eschweiler', u'http://www.aachener-nachrichten.de/cmlink/eschweiler-rss-1.359332'),
|
u'http://www.aachener-nachrichten.de/cmlink/nordkreis-rss-1.358150'),
|
||||||
(u'Lokales - Geilenkirchen', u'http://www.aachener-nachrichten.de/cmlink/geilenkirchen-rss-1.359643'),
|
(u'Lokales - Düren',
|
||||||
(u'Lokales - Heinsberg', u'http://www.aachener-nachrichten.de/cmlink/heinsberg-rss-1.359724'),
|
u'http://www.aachener-nachrichten.de/cmlink/dueren-rss-1.358626'),
|
||||||
(u'Lokales - Jülich', u'http://www.aachener-nachrichten.de/cmlink/juelich-rss-1.359725'),
|
(u'Lokales - Eiffel',
|
||||||
(u'Lokales - Stolberg', u'http://www.aachener-nachrichten.de/cmlink/stolberg-rss-1.359726'),
|
u'http://www.aachener-nachrichten.de/cmlink/eifel-rss-1.358978'),
|
||||||
(u'News - Politik', u'http://www.aachener-nachrichten.de/cmlink/politik-rss-1.359727'),
|
(u'Lokales - Eschweiler',
|
||||||
(u'News - Aus aller Welt', u'http://www.aachener-nachrichten.de/cmlink/ausallerwelt-rss-1.453282'),
|
u'http://www.aachener-nachrichten.de/cmlink/eschweiler-rss-1.359332'),
|
||||||
(u'News - Wirtschaft', u'http://www.aachener-nachrichten.de/cmlink/wirtschaft-rss-1.359872'),
|
(u'Lokales - Geilenkirchen',
|
||||||
(u'News - Kultur', u'http://www.aachener-nachrichten.de/cmlink/kultur-rss-1.365018'),
|
u'http://www.aachener-nachrichten.de/cmlink/geilenkirchen-rss-1.359643'),
|
||||||
(u'News - Kino', u'http://www.aachener-nachrichten.de/cmlink/kino-rss-1.365019'),
|
(u'Lokales - Heinsberg',
|
||||||
(u'News - Digital', u'http://www.aachener-nachrichten.de/cmlink/digital-rss-1.365020'),
|
u'http://www.aachener-nachrichten.de/cmlink/heinsberg-rss-1.359724'),
|
||||||
(u'News - Wissenschaft', u'http://www.aachener-nachrichten.de/cmlink/wissenschaft-rss-1.365021'),
|
(u'Lokales - Jülich',
|
||||||
(u'News - Hochschule', u'http://www.aachener-nachrichten.de/cmlink/hochschule-rss-1.365022'),
|
u'http://www.aachener-nachrichten.de/cmlink/juelich-rss-1.359725'),
|
||||||
(u'News - Auto', u'http://www.aachener-nachrichten.de/cmlink/auto-rss-1.365023'),
|
(u'Lokales - Stolberg',
|
||||||
(u'News - Kurioses', u'http://www.aachener-nachrichten.de/cmlink/kurioses-rss-1.365067'),
|
u'http://www.aachener-nachrichten.de/cmlink/stolberg-rss-1.359726'),
|
||||||
(u'News - Musik', u'http://www.aachener-nachrichten.de/cmlink/musik-rss-1.365305'),
|
(u'News - Politik',
|
||||||
(u'News - Tagesthema', u'http://www.aachener-nachrichten.de/cmlink/tagesthema-rss-1.365519'),
|
u'http://www.aachener-nachrichten.de/cmlink/politik-rss-1.359727'),
|
||||||
(u'News - Newsticker', u'http://www.aachener-nachrichten.de/cmlink/newsticker-rss-1.451948'),
|
(u'News - Aus aller Welt',
|
||||||
(u'Sport - Aktuell', u'http://www.aachener-nachrichten.de/cmlink/aktuell-rss-1.366716'),
|
u'http://www.aachener-nachrichten.de/cmlink/ausallerwelt-rss-1.453282'),
|
||||||
(u'Sport - Fußball', u'http://www.aachener-nachrichten.de/cmlink/fussball-rss-1.367060'),
|
(u'News - Wirtschaft',
|
||||||
(u'Sport - Bundesliga', u'http://www.aachener-nachrichten.de/cmlink/bundesliga-rss-1.453367'),
|
u'http://www.aachener-nachrichten.de/cmlink/wirtschaft-rss-1.359872'),
|
||||||
(u'Sport - Alemannia Aachen', u'http://www.aachener-nachrichten.de/cmlink/alemanniaaachen-rss-1.366057'),
|
(u'News - Kultur',
|
||||||
(u'Sport - Volleyball', u'http://www.aachener-nachrichten.de/cmlink/volleyball-rss-1.453370'),
|
u'http://www.aachener-nachrichten.de/cmlink/kultur-rss-1.365018'),
|
||||||
(u'Sport - Chio', u'http://www.aachener-nachrichten.de/cmlink/chio-rss-1.453371'),
|
(u'News - Kino', u'http://www.aachener-nachrichten.de/cmlink/kino-rss-1.365019'),
|
||||||
(u'Dossier - Kinderuni', u'http://www.aachener-nachrichten.de/cmlink/kinderuni-rss-1.453375'),
|
(u'News - Digital',
|
||||||
(u'Dossier - Karlspreis', u'http://www.aachener-nachrichten.de/cmlink/karlspreis-rss-1.453376'),
|
u'http://www.aachener-nachrichten.de/cmlink/digital-rss-1.365020'),
|
||||||
(u'Dossier - Ritterorden', u'http://www.aachener-nachrichten.de/cmlink/ritterorden-rss-1.453377'),
|
(u'News - Wissenschaft',
|
||||||
(u'Dossier - ZAB-Aachen', u'http://www.aachener-nachrichten.de/cmlink/zabaachen-rss-1.453380'),
|
u'http://www.aachener-nachrichten.de/cmlink/wissenschaft-rss-1.365021'),
|
||||||
(u'Dossier - Karneval', u'http://www.aachener-nachrichten.de/cmlink/karneval-rss-1.453384'),
|
(u'News - Hochschule',
|
||||||
(u'Ratgeber - Geld', u'http://www.aachener-nachrichten.de/cmlink/geld-rss-1.453385'),
|
u'http://www.aachener-nachrichten.de/cmlink/hochschule-rss-1.365022'),
|
||||||
(u'Ratgeber - Recht', u'http://www.aachener-nachrichten.de/cmlink/recht-rss-1.453386'),
|
(u'News - Auto', u'http://www.aachener-nachrichten.de/cmlink/auto-rss-1.365023'),
|
||||||
(u'Ratgeber - Gesundheit', u'http://www.aachener-nachrichten.de/cmlink/gesundheit-rss-1.453387'),
|
(u'News - Kurioses',
|
||||||
(u'Ratgeber - Familie', u'http://www.aachener-nachrichten.de/cmlink/familie-rss-1.453388'),
|
u'http://www.aachener-nachrichten.de/cmlink/kurioses-rss-1.365067'),
|
||||||
(u'Ratgeber - Livestyle', u'http://www.aachener-nachrichten.de/cmlink/lifestyle-rss-1.453389'),
|
(u'News - Musik',
|
||||||
(u'Ratgeber - Reisen', u'http://www.aachener-nachrichten.de/cmlink/reisen-rss-1.453390'),
|
u'http://www.aachener-nachrichten.de/cmlink/musik-rss-1.365305'),
|
||||||
(u'Ratgeber - Bauen und Wohnen', u'http://www.aachener-nachrichten.de/cmlink/bauen-rss-1.453398'),
|
(u'News - Tagesthema',
|
||||||
(u'Ratgeber - Bildung und Beruf', u'http://www.aachener-nachrichten.de/cmlink/bildung-rss-1.453400'),
|
u'http://www.aachener-nachrichten.de/cmlink/tagesthema-rss-1.365519'),
|
||||||
]
|
(u'News - Newsticker',
|
||||||
|
u'http://www.aachener-nachrichten.de/cmlink/newsticker-rss-1.451948'),
|
||||||
|
(u'Sport - Aktuell',
|
||||||
|
u'http://www.aachener-nachrichten.de/cmlink/aktuell-rss-1.366716'),
|
||||||
|
(u'Sport - Fußball',
|
||||||
|
u'http://www.aachener-nachrichten.de/cmlink/fussball-rss-1.367060'),
|
||||||
|
(u'Sport - Bundesliga',
|
||||||
|
u'http://www.aachener-nachrichten.de/cmlink/bundesliga-rss-1.453367'),
|
||||||
|
(u'Sport - Alemannia Aachen',
|
||||||
|
u'http://www.aachener-nachrichten.de/cmlink/alemanniaaachen-rss-1.366057'),
|
||||||
|
(u'Sport - Volleyball',
|
||||||
|
u'http://www.aachener-nachrichten.de/cmlink/volleyball-rss-1.453370'),
|
||||||
|
(u'Sport - Chio',
|
||||||
|
u'http://www.aachener-nachrichten.de/cmlink/chio-rss-1.453371'),
|
||||||
|
(u'Dossier - Kinderuni',
|
||||||
|
u'http://www.aachener-nachrichten.de/cmlink/kinderuni-rss-1.453375'),
|
||||||
|
(u'Dossier - Karlspreis',
|
||||||
|
u'http://www.aachener-nachrichten.de/cmlink/karlspreis-rss-1.453376'),
|
||||||
|
(u'Dossier - Ritterorden',
|
||||||
|
u'http://www.aachener-nachrichten.de/cmlink/ritterorden-rss-1.453377'),
|
||||||
|
(u'Dossier - ZAB-Aachen',
|
||||||
|
u'http://www.aachener-nachrichten.de/cmlink/zabaachen-rss-1.453380'),
|
||||||
|
(u'Dossier - Karneval',
|
||||||
|
u'http://www.aachener-nachrichten.de/cmlink/karneval-rss-1.453384'),
|
||||||
|
(u'Ratgeber - Geld',
|
||||||
|
u'http://www.aachener-nachrichten.de/cmlink/geld-rss-1.453385'),
|
||||||
|
(u'Ratgeber - Recht',
|
||||||
|
u'http://www.aachener-nachrichten.de/cmlink/recht-rss-1.453386'),
|
||||||
|
(u'Ratgeber - Gesundheit',
|
||||||
|
u'http://www.aachener-nachrichten.de/cmlink/gesundheit-rss-1.453387'),
|
||||||
|
(u'Ratgeber - Familie',
|
||||||
|
u'http://www.aachener-nachrichten.de/cmlink/familie-rss-1.453388'),
|
||||||
|
(u'Ratgeber - Livestyle',
|
||||||
|
u'http://www.aachener-nachrichten.de/cmlink/lifestyle-rss-1.453389'),
|
||||||
|
(u'Ratgeber - Reisen',
|
||||||
|
u'http://www.aachener-nachrichten.de/cmlink/reisen-rss-1.453390'),
|
||||||
|
(u'Ratgeber - Bauen und Wohnen',
|
||||||
|
u'http://www.aachener-nachrichten.de/cmlink/bauen-rss-1.453398'),
|
||||||
|
(u'Ratgeber - Bildung und Beruf',
|
||||||
|
u'http://www.aachener-nachrichten.de/cmlink/bildung-rss-1.453400'),
|
||||||
|
]
|
||||||
|
@ -1,43 +1,45 @@
|
|||||||
import re
|
import re
|
||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
|
||||||
|
|
||||||
class ABCRecipe(BasicNewsRecipe):
|
class ABCRecipe(BasicNewsRecipe):
|
||||||
title = u'ABC Linuxu'
|
title = u'ABC Linuxu'
|
||||||
oldest_article = 5
|
oldest_article = 5
|
||||||
max_articles_per_feed = 3#5
|
max_articles_per_feed = 3 # 5
|
||||||
__author__ = 'Funthomas'
|
__author__ = 'Funthomas'
|
||||||
language = 'cs'
|
language = 'cs'
|
||||||
|
|
||||||
feeds = [
|
feeds = [
|
||||||
#(u'Blogy', u'http://www.abclinuxu.cz/auto/blogDigest.rss'),
|
# (u'Blogy', u'http://www.abclinuxu.cz/auto/blogDigest.rss'),
|
||||||
(u'Články', u'http://www.abclinuxu.cz/auto/abc.rss'),
|
(u'Články', u'http://www.abclinuxu.cz/auto/abc.rss'),
|
||||||
(u'Zprávičky','http://www.abclinuxu.cz/auto/zpravicky.rss')
|
(u'Zprávičky', 'http://www.abclinuxu.cz/auto/zpravicky.rss')
|
||||||
]
|
]
|
||||||
|
|
||||||
remove_javascript = True
|
remove_javascript = True
|
||||||
no_stylesheets = True
|
no_stylesheets = True
|
||||||
remove_attributes = ['width','height']
|
remove_attributes = ['width', 'height']
|
||||||
|
|
||||||
remove_tags_before = dict(name='h1')
|
remove_tags_before = dict(name='h1')
|
||||||
remove_tags = [
|
remove_tags = [
|
||||||
dict(attrs={'class':['meta-vypis','page_tools','cl_perex']}),
|
dict(attrs={'class': ['meta-vypis', 'page_tools', 'cl_perex']}),
|
||||||
dict(attrs={'class':['cl_nadpis-link','komix-nav']})
|
dict(attrs={'class': ['cl_nadpis-link', 'komix-nav']})
|
||||||
]
|
]
|
||||||
|
|
||||||
remove_tags_after = [
|
remove_tags_after = [
|
||||||
dict(name='div',attrs={'class':['cl_perex','komix-nav']}),
|
dict(name='div', attrs={'class': ['cl_perex', 'komix-nav']}),
|
||||||
dict(attrs={'class':['meta-vypis','page_tools']}),
|
dict(attrs={'class': ['meta-vypis', 'page_tools']}),
|
||||||
dict(name='',attrs={'':''}),
|
dict(name='', attrs={'': ''}),
|
||||||
]
|
]
|
||||||
|
|
||||||
|
preprocess_regexps = [
|
||||||
|
(re.compile(r'</div>.*<p class="perex">', re.DOTALL),
|
||||||
|
lambda match: '</div><p class="perex">')
|
||||||
|
]
|
||||||
|
|
||||||
preprocess_regexps = [
|
def print_version(self, url):
|
||||||
(re.compile(r'</div>.*<p class="perex">', re.DOTALL),lambda match: '</div><p class="perex">')
|
return url + '?varianta=print&noDiz'
|
||||||
]
|
|
||||||
def print_version(self, url):
|
|
||||||
return url + '?varianta=print&noDiz'
|
|
||||||
|
|
||||||
extra_css = '''
|
extra_css = '''
|
||||||
h1 {font-size:130%; font-weight:bold}
|
h1 {font-size:130%; font-weight:bold}
|
||||||
h3 {font-size:111%; font-weight:bold}
|
h3 {font-size:111%; font-weight:bold}
|
||||||
'''
|
'''
|
||||||
|
@ -1,4 +1,4 @@
|
|||||||
__license__ = 'GPL v3'
|
__license__ = 'GPL v3'
|
||||||
__copyright__ = '2011, Pat Stapleton <pat.stapleton at gmail.com>'
|
__copyright__ = '2011, Pat Stapleton <pat.stapleton at gmail.com>'
|
||||||
'''
|
'''
|
||||||
abc.net.au/news
|
abc.net.au/news
|
||||||
@ -6,51 +6,50 @@ abc.net.au/news
|
|||||||
import re
|
import re
|
||||||
from calibre.web.feeds.recipes import BasicNewsRecipe
|
from calibre.web.feeds.recipes import BasicNewsRecipe
|
||||||
|
|
||||||
|
|
||||||
class ABCNews(BasicNewsRecipe):
|
class ABCNews(BasicNewsRecipe):
|
||||||
title = 'ABC News'
|
title = 'ABC News'
|
||||||
__author__ = 'Pat Stapleton, Dean Cording'
|
__author__ = 'Pat Stapleton, Dean Cording'
|
||||||
description = 'News from Australia'
|
description = 'News from Australia'
|
||||||
masthead_url = 'http://www.abc.net.au/news/assets/v5/images/common/logo-news.png'
|
masthead_url = 'http://www.abc.net.au/news/assets/v5/images/common/logo-news.png'
|
||||||
cover_url = 'http://www.abc.net.au/news/assets/v5/images/common/logo-news.png'
|
cover_url = 'http://www.abc.net.au/news/assets/v5/images/common/logo-news.png'
|
||||||
|
|
||||||
oldest_article = 2
|
oldest_article = 2
|
||||||
max_articles_per_feed = 100
|
max_articles_per_feed = 100
|
||||||
no_stylesheets = False
|
no_stylesheets = False
|
||||||
#delay = 1
|
use_embedded_content = False
|
||||||
use_embedded_content = False
|
encoding = 'utf8'
|
||||||
encoding = 'utf8'
|
publisher = 'ABC News'
|
||||||
publisher = 'ABC News'
|
category = 'News, Australia, World'
|
||||||
category = 'News, Australia, World'
|
language = 'en_AU'
|
||||||
language = 'en_AU'
|
publication_type = 'newsportal'
|
||||||
publication_type = 'newsportal'
|
|
||||||
# preprocess_regexps = [(re.compile(r'<!--.*?-->', re.DOTALL), lambda m: '')]
|
# preprocess_regexps = [(re.compile(r'<!--.*?-->', re.DOTALL), lambda m: '')]
|
||||||
#Remove annoying map links (inline-caption class is also used for some image captions! hence regex to match maps.google)
|
# Remove annoying map links (inline-caption class is also used for some
|
||||||
preprocess_regexps = [(re.compile(r'<a class="inline-caption" href="http://maps\.google\.com.*?/a>', re.DOTALL), lambda m: '')]
|
# image captions! hence regex to match maps.google)
|
||||||
|
preprocess_regexps = [(re.compile(
|
||||||
|
r'<a class="inline-caption" href="http://maps\.google\.com.*?/a>', re.DOTALL), lambda m: '')]
|
||||||
conversion_options = {
|
conversion_options = {
|
||||||
'comments' : description
|
'comments': description, 'tags': category, 'language': language, 'publisher': publisher, 'linearize_tables': False
|
||||||
,'tags' : category
|
}
|
||||||
,'language' : language
|
|
||||||
,'publisher' : publisher
|
|
||||||
,'linearize_tables': False
|
|
||||||
}
|
|
||||||
|
|
||||||
keep_only_tags = [dict(attrs={'class':['article section']})]
|
keep_only_tags = [dict(attrs={'class': ['article section']})]
|
||||||
|
|
||||||
remove_tags = [dict(attrs={'class':['related', 'tags', 'tools', 'attached-content ready',
|
remove_tags = [dict(attrs={'class': ['related', 'tags', 'tools', 'attached-content ready',
|
||||||
'inline-content story left', 'inline-content map left contracted', 'published',
|
'inline-content story left', 'inline-content map left contracted', 'published',
|
||||||
'story-map', 'statepromo', 'topics', ]})]
|
'story-map', 'statepromo', 'topics', ]})]
|
||||||
|
|
||||||
remove_attributes = ['width','height']
|
remove_attributes = ['width', 'height']
|
||||||
|
|
||||||
feeds = [
|
feeds = [
|
||||||
('Top Stories', 'http://www.abc.net.au/news/feed/45910/rss.xml'),
|
('Top Stories', 'http://www.abc.net.au/news/feed/45910/rss.xml'),
|
||||||
('Canberra', 'http://www.abc.net.au/news/feed/6910/rss.xml'),
|
('Canberra', 'http://www.abc.net.au/news/feed/6910/rss.xml'),
|
||||||
('Sydney', 'http://www.abc.net.au/news/feed/10232/rss.xml'),
|
('Sydney', 'http://www.abc.net.au/news/feed/10232/rss.xml'),
|
||||||
('Melbourne', 'http://www.abc.net.au/news/feed/21708/rss.xml'),
|
('Melbourne', 'http://www.abc.net.au/news/feed/21708/rss.xml'),
|
||||||
('Brisbane', 'http://www.abc.net.au/news/feed/12858/rss.xml'),
|
('Brisbane', 'http://www.abc.net.au/news/feed/12858/rss.xml'),
|
||||||
('Perth', 'feed://www.abc.net.au/news/feed/24886/rss.xml'),
|
('Perth', 'feed://www.abc.net.au/news/feed/24886/rss.xml'),
|
||||||
('Australia', 'http://www.abc.net.au/news/feed/46182/rss.xml'),
|
('Australia', 'http://www.abc.net.au/news/feed/46182/rss.xml'),
|
||||||
('World', 'http://www.abc.net.au/news/feed/52278/rss.xml'),
|
('World', 'http://www.abc.net.au/news/feed/52278/rss.xml'),
|
||||||
('Business', 'http://www.abc.net.au/news/feed/51892/rss.xml'),
|
('Business', 'http://www.abc.net.au/news/feed/51892/rss.xml'),
|
||||||
('Science and Technology', 'http://www.abc.net.au/news/feed/2298/rss.xml'),
|
('Science and Technology',
|
||||||
]
|
'http://www.abc.net.au/news/feed/2298/rss.xml'),
|
||||||
|
]
|
||||||
|
@ -1,8 +1,8 @@
|
|||||||
__license__ = 'GPL v3'
|
__license__ = 'GPL v3'
|
||||||
__author__ = 'Ricardo Jurado'
|
__author__ = 'Ricardo Jurado'
|
||||||
__copyright__ = 'Ricardo Jurado'
|
__copyright__ = 'Ricardo Jurado'
|
||||||
__version__ = 'v0.4'
|
__version__ = 'v0.4'
|
||||||
__date__ = '11 February 2011'
|
__date__ = '11 February 2011'
|
||||||
|
|
||||||
'''
|
'''
|
||||||
http://www.abc.es/
|
http://www.abc.es/
|
||||||
@ -10,16 +10,17 @@ http://www.abc.es/
|
|||||||
|
|
||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
|
||||||
|
|
||||||
class AdvancedUserRecipe1296604369(BasicNewsRecipe):
|
class AdvancedUserRecipe1296604369(BasicNewsRecipe):
|
||||||
|
|
||||||
title = u'ABC.es'
|
title = u'ABC.es'
|
||||||
masthead_url = 'http://www.abc.es/img/logo-abc.gif'
|
masthead_url = 'http://www.abc.es/img/logo-abc.gif'
|
||||||
cover_url = 'http://www.abc.es/img/logo-abc.gif'
|
cover_url = 'http://www.abc.es/img/logo-abc.gif'
|
||||||
publisher = u'Grupo VOCENTO'
|
publisher = u'Grupo VOCENTO'
|
||||||
|
|
||||||
__author__ = 'Ricardo Jurado'
|
__author__ = 'Ricardo Jurado'
|
||||||
description = 'Noticias de Spain y el mundo'
|
description = 'Noticias de Spain y el mundo'
|
||||||
category = 'News,Spain,National,International,Economy'
|
category = 'News,Spain,National,International,Economy'
|
||||||
oldest_article = 2
|
oldest_article = 2
|
||||||
max_articles_per_feed = 10
|
max_articles_per_feed = 10
|
||||||
|
|
||||||
@ -38,20 +39,21 @@ class AdvancedUserRecipe1296604369(BasicNewsRecipe):
|
|||||||
h1{font-family: sans-serif; font-size:150%; font-weight:bold; text-align: justify; }
|
h1{font-family: sans-serif; font-size:150%; font-weight:bold; text-align: justify; }
|
||||||
"""
|
"""
|
||||||
|
|
||||||
feeds = [
|
feeds = [
|
||||||
(u'PORTADA', u'http://www.abc.es/rss/feeds/abcPortada.xml')
|
|
||||||
,(u'ULTIMAS', u'http://www.abc.es/rss/feeds/abc_ultima.xml')
|
(u'PORTADA', u'http://www.abc.es/rss/feeds/abcPortada.xml'),
|
||||||
,(u'NACIONAL', u'http://www.abc.es/rss/feeds/abc_EspanaEspana.xml')
|
(u'ULTIMAS', u'http://www.abc.es/rss/feeds/abc_ultima.xml'),
|
||||||
,(u'INTERNACIONAL', u'http://www.abc.es/rss/feeds/abc_Internacional.xml')
|
(u'NACIONAL', u'http://www.abc.es/rss/feeds/abc_EspanaEspana.xml'),
|
||||||
,(u'OPINION', u'http://www.abc.es/rss/feeds/abc_opinioncompleto.xml')
|
(u'INTERNACIONAL', u'http://www.abc.es/rss/feeds/abc_Internacional.xml'),
|
||||||
,(u'BLOGS ABC', u'http://www.abc.es/rss/feeds/blogs-abc.xml')
|
(u'OPINION', u'http://www.abc.es/rss/feeds/abc_opinioncompleto.xml'),
|
||||||
,(u'ECONOMIA', u'http://www.abc.es/rss/feeds/abc_Economia.xml')
|
(u'BLOGS ABC', u'http://www.abc.es/rss/feeds/blogs-abc.xml'),
|
||||||
,(u'CIENCIA Y TECNOLOGIA', u'http://www.abc.es/rss/feeds/abc_Ciencia_Tecnologia.xml')
|
(u'ECONOMIA', u'http://www.abc.es/rss/feeds/abc_Economia.xml'),
|
||||||
,(u'CULTURA', u'http://www.abc.es/rss/feeds/abc_Cultura.xml')
|
(u'CIENCIA Y TECNOLOGIA', u'http://www.abc.es/rss/feeds/abc_Ciencia_Tecnologia.xml'),
|
||||||
,(u'LIBROS', u'http://www.abc.es/rss/feeds/abc_Libros.xml')
|
(u'CULTURA', u'http://www.abc.es/rss/feeds/abc_Cultura.xml'),
|
||||||
,(u'MEDIOS Y REDES', u'http://www.abc.es/rss/feeds/ABC_Medios_Redes.xml')
|
(u'LIBROS', u'http://www.abc.es/rss/feeds/abc_Libros.xml'),
|
||||||
,(u'EVASION', u'http://www.abc.es/rss/feeds/abc_evasion.xml')
|
(u'MEDIOS Y REDES', u'http://www.abc.es/rss/feeds/ABC_Medios_Redes.xml'),
|
||||||
,(u'ESPECTACULOS', u'http://www.abc.es/rss/feeds/abc_Espectaculos.xml')
|
(u'EVASION', u'http://www.abc.es/rss/feeds/abc_evasion.xml'),
|
||||||
,(u'GENTE', u'http://www.abc.es/rss/feeds/abc_Gente.xml')
|
(u'ESPECTACULOS', u'http://www.abc.es/rss/feeds/abc_Espectaculos.xml'),
|
||||||
,(u'DEPORTES', u'http://www.abc.es/rss/feeds/abc_Deportes.xml')
|
(u'GENTE', u'http://www.abc.es/rss/feeds/abc_Gente.xml'),
|
||||||
]
|
(u'DEPORTES', u'http://www.abc.es/rss/feeds/abc_Deportes.xml')
|
||||||
|
]
|
||||||
|
@ -1,4 +1,4 @@
|
|||||||
__license__ = 'GPL v3'
|
__license__ = 'GPL v3'
|
||||||
__copyright__ = '2010-2012, Darko Miletic <darko.miletic at gmail.com>'
|
__copyright__ = '2010-2012, Darko Miletic <darko.miletic at gmail.com>'
|
||||||
'''
|
'''
|
||||||
abc.com.py
|
abc.com.py
|
||||||
@ -6,46 +6,45 @@ abc.com.py
|
|||||||
|
|
||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
|
||||||
|
|
||||||
class ABC_py(BasicNewsRecipe):
|
class ABC_py(BasicNewsRecipe):
|
||||||
title = 'ABC Color'
|
title = 'ABC Color'
|
||||||
__author__ = 'Darko Miletic'
|
__author__ = 'Darko Miletic'
|
||||||
description = 'Noticias de Paraguay y el resto del mundo'
|
description = 'Noticias de Paraguay y el resto del mundo'
|
||||||
publisher = 'ABC'
|
publisher = 'ABC'
|
||||||
category = 'news, politics, Paraguay'
|
category = 'news, politics, Paraguay'
|
||||||
oldest_article = 2
|
oldest_article = 2
|
||||||
max_articles_per_feed = 200
|
max_articles_per_feed = 200
|
||||||
no_stylesheets = True
|
no_stylesheets = True
|
||||||
encoding = 'utf8'
|
encoding = 'utf8'
|
||||||
use_embedded_content = False
|
use_embedded_content = False
|
||||||
language = 'es_PY'
|
language = 'es_PY'
|
||||||
remove_empty_feeds = True
|
remove_empty_feeds = True
|
||||||
masthead_url = 'http://www.abc.com.py/plantillas/img/abc-logo.png'
|
masthead_url = 'http://www.abc.com.py/plantillas/img/abc-logo.png'
|
||||||
publication_type = 'newspaper'
|
publication_type = 'newspaper'
|
||||||
extra_css = """
|
extra_css = """
|
||||||
body{font-family: UnitSlabProMedium,"Times New Roman",serif }
|
body{font-family: UnitSlabProMedium,"Times New Roman",serif }
|
||||||
img{margin-bottom: 0.4em; display: block;}
|
img{margin-bottom: 0.4em; display: block;}
|
||||||
"""
|
"""
|
||||||
|
|
||||||
conversion_options = {
|
conversion_options = {
|
||||||
'comment' : description
|
'comment': description, 'tags': category, 'publisher': publisher, 'language': language
|
||||||
, 'tags' : category
|
}
|
||||||
, 'publisher' : publisher
|
|
||||||
, 'language' : language
|
|
||||||
}
|
|
||||||
|
|
||||||
remove_tags = [
|
|
||||||
dict(name=['form','iframe','embed','object','link','base','table']),
|
|
||||||
dict(attrs={'class':['es-carousel-wrapper']}),
|
|
||||||
dict(attrs={'id':['tools','article-banner-1']})
|
|
||||||
]
|
|
||||||
keep_only_tags = [dict(attrs={'id':'article'})]
|
|
||||||
|
|
||||||
|
remove_tags = [
|
||||||
|
dict(name=['form', 'iframe', 'embed',
|
||||||
|
'object', 'link', 'base', 'table']),
|
||||||
|
dict(attrs={'class': ['es-carousel-wrapper']}),
|
||||||
|
dict(attrs={'id': ['tools', 'article-banner-1']})
|
||||||
|
]
|
||||||
|
keep_only_tags = [dict(attrs={'id': 'article'})]
|
||||||
|
|
||||||
feeds = [
|
feeds = [
|
||||||
(u'Ultimo momento', u'http://www.abc.com.py/rss.xml' )
|
|
||||||
,(u'Nacionales' , u'http://www.abc.com.py/nacionales/rss.xml' )
|
(u'Ultimo momento', u'http://www.abc.com.py/rss.xml'),
|
||||||
,(u'Mundo' , u'http://www.abc.com.py/internacionales/rss.xml')
|
(u'Nacionales', u'http://www.abc.com.py/nacionales/rss.xml'),
|
||||||
,(u'Deportes' , u'http://www.abc.com.py/deportes/rss.xml' )
|
(u'Mundo', u'http://www.abc.com.py/internacionales/rss.xml'),
|
||||||
,(u'Espectaculos' , u'http://www.abc.com.py/espectaculos/rss.xml' )
|
(u'Deportes', u'http://www.abc.com.py/deportes/rss.xml'),
|
||||||
,(u'TecnoCiencia' , u'http://www.abc.com.py/ciencia/rss.xml' )
|
(u'Espectaculos', u'http://www.abc.com.py/espectaculos/rss.xml'),
|
||||||
]
|
(u'TecnoCiencia', u'http://www.abc.com.py/ciencia/rss.xml')
|
||||||
|
]
|
||||||
|
@ -1,6 +1,6 @@
|
|||||||
#!/usr/bin/env python2
|
#!/usr/bin/env python2
|
||||||
|
|
||||||
__license__ = 'GPL v3'
|
__license__ = 'GPL v3'
|
||||||
__copyright__ = '2008-2009, Darko Miletic <darko.miletic at gmail.com>'
|
__copyright__ = '2008-2009, Darko Miletic <darko.miletic at gmail.com>'
|
||||||
'''
|
'''
|
||||||
www.accountancyage.com
|
www.accountancyage.com
|
||||||
@ -8,26 +8,28 @@ www.accountancyage.com
|
|||||||
|
|
||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
|
||||||
|
|
||||||
class AccountancyAge(BasicNewsRecipe):
|
class AccountancyAge(BasicNewsRecipe):
|
||||||
title = 'Accountancy Age'
|
title = 'Accountancy Age'
|
||||||
__author__ = 'Darko Miletic'
|
__author__ = 'Darko Miletic'
|
||||||
description = 'business news'
|
description = 'business news'
|
||||||
publisher = 'accountancyage.com'
|
publisher = 'accountancyage.com'
|
||||||
category = 'news, politics, finances'
|
category = 'news, politics, finances'
|
||||||
oldest_article = 2
|
oldest_article = 2
|
||||||
max_articles_per_feed = 100
|
max_articles_per_feed = 100
|
||||||
no_stylesheets = True
|
no_stylesheets = True
|
||||||
use_embedded_content = False
|
use_embedded_content = False
|
||||||
simultaneous_downloads = 1
|
simultaneous_downloads = 1
|
||||||
encoding = 'utf-8'
|
encoding = 'utf-8'
|
||||||
lang = 'en'
|
lang = 'en'
|
||||||
language = 'en'
|
language = 'en'
|
||||||
|
|
||||||
feeds = [(u'All News', u'http://feeds.accountancyage.com/rss/latest/accountancyage/all')]
|
feeds = [
|
||||||
|
(u'All News', u'http://feeds.accountancyage.com/rss/latest/accountancyage/all')]
|
||||||
|
|
||||||
keep_only_tags = [
|
keep_only_tags = [
|
||||||
dict(name='h1'),
|
dict(name='h1'),
|
||||||
dict(attrs={'class':'article_content'}),
|
dict(attrs={'class': 'article_content'}),
|
||||||
]
|
]
|
||||||
|
|
||||||
def get_article_url(self, article):
|
def get_article_url(self, article):
|
||||||
|
@ -2,26 +2,23 @@
|
|||||||
|
|
||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
|
||||||
|
|
||||||
class AdvancedUserRecipe1334868409(BasicNewsRecipe):
|
class AdvancedUserRecipe1334868409(BasicNewsRecipe):
|
||||||
title = u'AÇIK BİLİM DERGİSİ'
|
title = u'AÇIK BİLİM DERGİSİ'
|
||||||
description = ' Aylık çevrimiçi bilim dergisi'
|
description = ' Aylık çevrimiçi bilim dergisi'
|
||||||
__author__ = u'thomass'
|
__author__ = u'thomass'
|
||||||
oldest_article = 30
|
oldest_article = 30
|
||||||
max_articles_per_feed = 300
|
max_articles_per_feed = 300
|
||||||
auto_cleanup = True
|
auto_cleanup = True
|
||||||
encoding = 'UTF-8'
|
encoding = 'UTF-8'
|
||||||
publisher = 'açık bilim'
|
publisher = 'açık bilim'
|
||||||
category = 'haber, bilim,TR,dergi'
|
category = 'haber, bilim,TR,dergi'
|
||||||
language = 'tr'
|
language = 'tr'
|
||||||
publication_type = 'magazine '
|
publication_type = 'magazine '
|
||||||
conversion_options = {
|
conversion_options = {
|
||||||
'tags' : category
|
'tags': category, 'language': language, 'publisher': publisher, 'linearize_tables': True
|
||||||
,'language' : language
|
}
|
||||||
,'publisher' : publisher
|
|
||||||
,'linearize_tables': True
|
|
||||||
}
|
|
||||||
cover_img_url = 'http://www.acikbilim.com/wp-content/themes/Equilibrium/images/logodene.jpg'
|
cover_img_url = 'http://www.acikbilim.com/wp-content/themes/Equilibrium/images/logodene.jpg'
|
||||||
masthead_url = 'http://www.acikbilim.com/wp-content/themes/Equilibrium/images/logodene.jpg'
|
masthead_url = 'http://www.acikbilim.com/wp-content/themes/Equilibrium/images/logodene.jpg'
|
||||||
|
|
||||||
|
feeds = [(u'Tüm Yayınlar', u'http://www.acikbilim.com/feed')]
|
||||||
feeds = [(u'Tüm Yayınlar', u'http://www.acikbilim.com/feed')]
|
|
||||||
|
@ -1,7 +1,7 @@
|
|||||||
# vim:fileencoding=utf-8
|
# vim:fileencoding=utf-8
|
||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
__license__ = 'GPL v3'
|
__license__ = 'GPL v3'
|
||||||
__copyright__ = '2012'
|
__copyright__ = '2012'
|
||||||
'''
|
'''
|
||||||
acrimed.org
|
acrimed.org
|
||||||
@ -10,19 +10,21 @@ acrimed.org
|
|||||||
import re
|
import re
|
||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
|
||||||
class Acrimed(BasicNewsRecipe):
|
|
||||||
title = u'Acrimed'
|
|
||||||
__author__ = 'Gaëtan Lehmann'
|
|
||||||
oldest_article = 30
|
|
||||||
max_articles_per_feed = 100
|
|
||||||
auto_cleanup = True
|
|
||||||
auto_cleanup_keep = '//div[@class="crayon article-chapo-4112 chapo"]'
|
|
||||||
language = 'fr'
|
|
||||||
masthead_url = 'http://www.acrimed.org/IMG/siteon0.gif'
|
|
||||||
feeds = [(u'Acrimed', u'http://www.acrimed.org/spip.php?page=backend')]
|
|
||||||
|
|
||||||
preprocess_regexps = [
|
class Acrimed(BasicNewsRecipe):
|
||||||
(re.compile(r'<title>(.*) - Acrimed \| Action Critique M.*dias</title>'), lambda m: '<title>' + m.group(1) + '</title>'),
|
title = u'Acrimed'
|
||||||
|
__author__ = 'Gaëtan Lehmann'
|
||||||
|
oldest_article = 30
|
||||||
|
max_articles_per_feed = 100
|
||||||
|
auto_cleanup = True
|
||||||
|
auto_cleanup_keep = '//div[@class="crayon article-chapo-4112 chapo"]'
|
||||||
|
language = 'fr'
|
||||||
|
masthead_url = 'http://www.acrimed.org/IMG/siteon0.gif'
|
||||||
|
feeds = [(u'Acrimed', u'http://www.acrimed.org/spip.php?page=backend')]
|
||||||
|
|
||||||
|
preprocess_regexps = [
|
||||||
|
(re.compile(r'<title>(.*) - Acrimed \| Action Critique M.*dias</title>'),
|
||||||
|
lambda m: '<title>' + m.group(1) + '</title>'),
|
||||||
(re.compile(r'<h2>(.*) - Acrimed \| Action Critique M.*dias</h2>'), lambda m: '<h2>' + m.group(1) + '</h2>')]
|
(re.compile(r'<h2>(.*) - Acrimed \| Action Critique M.*dias</h2>'), lambda m: '<h2>' + m.group(1) + '</h2>')]
|
||||||
|
|
||||||
extra_css = """
|
extra_css = """
|
||||||
|
@ -1,8 +1,9 @@
|
|||||||
import re
|
import re
|
||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
|
||||||
|
|
||||||
class ADRecipe(BasicNewsRecipe):
|
class ADRecipe(BasicNewsRecipe):
|
||||||
__license__ = 'GPL v3'
|
__license__ = 'GPL v3'
|
||||||
__author__ = 'kwetal'
|
__author__ = 'kwetal'
|
||||||
language = 'nl'
|
language = 'nl'
|
||||||
country = 'NL'
|
country = 'NL'
|
||||||
@ -22,41 +23,54 @@ class ADRecipe(BasicNewsRecipe):
|
|||||||
remove_javascript = True
|
remove_javascript = True
|
||||||
|
|
||||||
keep_only_tags = []
|
keep_only_tags = []
|
||||||
keep_only_tags.append(dict(name = 'div', attrs = {'id': 'art_box2'}))
|
keep_only_tags.append(dict(name='div', attrs={'id': 'art_box2'}))
|
||||||
keep_only_tags.append(dict(name = 'p', attrs = {'class': 'gen_footnote3'}))
|
keep_only_tags.append(dict(name='p', attrs={'class': 'gen_footnote3'}))
|
||||||
|
|
||||||
remove_tags = []
|
remove_tags = []
|
||||||
remove_tags.append(dict(name = 'div', attrs = {'class': 'gen_clear'}))
|
remove_tags.append(dict(name='div', attrs={'class': 'gen_clear'}))
|
||||||
remove_tags.append(dict(name = 'div', attrs = {'class': re.compile(r'gen_spacer.*')}))
|
remove_tags.append(
|
||||||
|
dict(name='div', attrs={'class': re.compile(r'gen_spacer.*')}))
|
||||||
|
|
||||||
remove_attributes = ['style']
|
remove_attributes = ['style']
|
||||||
|
|
||||||
# feeds from http://ad.nl/ad/nl/1401/home/integration/nmc/frameset/ad_footer/rssFeeds.dhtml
|
# feeds from
|
||||||
|
# http://ad.nl/ad/nl/1401/home/integration/nmc/frameset/ad_footer/rssFeeds.dhtml
|
||||||
feeds = []
|
feeds = []
|
||||||
feeds.append((u'Binnenland', u'http://www.ad.nl/nieuws/binnenland/rss.xml'))
|
feeds.append(
|
||||||
feeds.append((u'Buitenland', u'http://www.ad.nl/nieuws/buitenland/rss.xml'))
|
(u'Binnenland', u'http://www.ad.nl/nieuws/binnenland/rss.xml'))
|
||||||
|
feeds.append(
|
||||||
|
(u'Buitenland', u'http://www.ad.nl/nieuws/buitenland/rss.xml'))
|
||||||
feeds.append((u'Bizar', u'http://www.ad.nl/nieuws/bizar/rss.xml'))
|
feeds.append((u'Bizar', u'http://www.ad.nl/nieuws/bizar/rss.xml'))
|
||||||
feeds.append((u'Gezondheid & Wetenschap', u'http://www.ad.nl/nieuws/gezondheidwetenschap/rss.xml'))
|
feeds.append((u'Gezondheid & Wetenschap',
|
||||||
|
u'http://www.ad.nl/nieuws/gezondheidwetenschap/rss.xml'))
|
||||||
feeds.append((u'Economie', u'http://www.ad.nl/nieuws/economie/rss.xml'))
|
feeds.append((u'Economie', u'http://www.ad.nl/nieuws/economie/rss.xml'))
|
||||||
feeds.append((u'Nederlands Voetbal', u'http://www.ad.nl/sportwereld/nederlandsvoetbal/rss.xml'))
|
feeds.append((u'Nederlands Voetbal',
|
||||||
feeds.append((u'Buitenlands Voetbal', u'http://www.ad.nl/sportwereld/buitenlandsvoetbal/rss.xml'))
|
u'http://www.ad.nl/sportwereld/nederlandsvoetbal/rss.xml'))
|
||||||
feeds.append((u'Champions League/Europa League', u'http://www.ad.nl/sportwereld/championsleagueeuropaleague/rss.xml'))
|
feeds.append((u'Buitenlands Voetbal',
|
||||||
feeds.append((u'Wielrennen', u'http://www.ad.nl/sportwereld/wielrennen/rss.xml'))
|
u'http://www.ad.nl/sportwereld/buitenlandsvoetbal/rss.xml'))
|
||||||
|
feeds.append((u'Champions League/Europa League',
|
||||||
|
u'http://www.ad.nl/sportwereld/championsleagueeuropaleague/rss.xml'))
|
||||||
|
feeds.append(
|
||||||
|
(u'Wielrennen', u'http://www.ad.nl/sportwereld/wielrennen/rss.xml'))
|
||||||
feeds.append((u'Tennis', u'http://www.ad.nl/sportwereld/tennis/rss.xml'))
|
feeds.append((u'Tennis', u'http://www.ad.nl/sportwereld/tennis/rss.xml'))
|
||||||
feeds.append((u'Formule 1', u'http://www.ad.nl/sportwereld/formule1/rss.xml'))
|
feeds.append(
|
||||||
feeds.append((u'Meer Sport', u'http://www.ad.nl/sportwereld/meersport/rss.xml'))
|
(u'Formule 1', u'http://www.ad.nl/sportwereld/formule1/rss.xml'))
|
||||||
|
feeds.append(
|
||||||
|
(u'Meer Sport', u'http://www.ad.nl/sportwereld/meersport/rss.xml'))
|
||||||
feeds.append((u'Celebs', u'http://www.ad.nl/showbizz/celebs/rss.xml'))
|
feeds.append((u'Celebs', u'http://www.ad.nl/showbizz/celebs/rss.xml'))
|
||||||
feeds.append((u'Film', u'http://www.ad.nl/showbizz/film/rss.xml'))
|
feeds.append((u'Film', u'http://www.ad.nl/showbizz/film/rss.xml'))
|
||||||
feeds.append((u'Muziek', u'http://www.ad.nl/showbizz/muziek/rss.xml'))
|
feeds.append((u'Muziek', u'http://www.ad.nl/showbizz/muziek/rss.xml'))
|
||||||
feeds.append((u'TV', u'http://www.ad.nl/showbizz/tv/rss.xml'))
|
feeds.append((u'TV', u'http://www.ad.nl/showbizz/tv/rss.xml'))
|
||||||
feeds.append((u'Kunst & Literatuur', u'http://www.ad.nl/showbizz/kunstenliteratuur/rss.xml'))
|
feeds.append((u'Kunst & Literatuur',
|
||||||
|
u'http://www.ad.nl/showbizz/kunstenliteratuur/rss.xml'))
|
||||||
feeds.append((u'Jouw Wereld', u'http://www.ad.nl/you/rss.xml'))
|
feeds.append((u'Jouw Wereld', u'http://www.ad.nl/you/rss.xml'))
|
||||||
feeds.append((u'Consument', u'http://www.ad.nl/consument/rss.xml'))
|
feeds.append((u'Consument', u'http://www.ad.nl/consument/rss.xml'))
|
||||||
feeds.append((u'Autowereld', u'http://www.ad.nl/autowereld/rss.xml'))
|
feeds.append((u'Autowereld', u'http://www.ad.nl/autowereld/rss.xml'))
|
||||||
feeds.append((u'Reiswereld', u'http://www.ad.nl/reiswereld/rss.xml'))
|
feeds.append((u'Reiswereld', u'http://www.ad.nl/reiswereld/rss.xml'))
|
||||||
feeds.append((u'Internet', u'http://www.ad.nl/digitaal/internet/rss.xml'))
|
feeds.append((u'Internet', u'http://www.ad.nl/digitaal/internet/rss.xml'))
|
||||||
feeds.append((u'Games', u'http://www.ad.nl/digitaal/games/rss.xml'))
|
feeds.append((u'Games', u'http://www.ad.nl/digitaal/games/rss.xml'))
|
||||||
feeds.append((u'Multimedia', u'http://www.ad.nl/digitaal/multimedia/rss.xml'))
|
feeds.append(
|
||||||
|
(u'Multimedia', u'http://www.ad.nl/digitaal/multimedia/rss.xml'))
|
||||||
feeds.append((u'Planet Watch', u'http://www.ad.nl/planetwatch/rss.xml'))
|
feeds.append((u'Planet Watch', u'http://www.ad.nl/planetwatch/rss.xml'))
|
||||||
|
|
||||||
extra_css = '''
|
extra_css = '''
|
||||||
@ -71,7 +85,8 @@ class ADRecipe(BasicNewsRecipe):
|
|||||||
def print_version(self, url):
|
def print_version(self, url):
|
||||||
parts = url.split('/')
|
parts = url.split('/')
|
||||||
print_url = 'http://' + parts[2] + '/' + parts[3] + '/' + parts[4] + '/' + parts[5] + '/' \
|
print_url = 'http://' + parts[2] + '/' + parts[3] + '/' + parts[4] + '/' + parts[5] + '/' \
|
||||||
+ parts[10] + '/' + parts[7] + '/print/' + parts[8] + '/' + parts[9] + '/' + parts[13]
|
+ parts[10] + '/' + parts[7] + '/print/' + \
|
||||||
|
parts[8] + '/' + parts[9] + '/' + parts[13]
|
||||||
|
|
||||||
return print_url
|
return print_url
|
||||||
|
|
||||||
|
@ -1,7 +1,7 @@
|
|||||||
#!/usr/bin/env python2
|
#!/usr/bin/env python2
|
||||||
# -*- coding: utf-8 -*-
|
# -*- coding: utf-8 -*-
|
||||||
|
|
||||||
__license__ = 'GPL v3'
|
__license__ = 'GPL v3'
|
||||||
__copyright__ = u'2011, Silviu Cotoar\u0103'
|
__copyright__ = u'2011, Silviu Cotoar\u0103'
|
||||||
'''
|
'''
|
||||||
adevarul.ro
|
adevarul.ro
|
||||||
@ -9,51 +9,38 @@ adevarul.ro
|
|||||||
|
|
||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
|
||||||
|
|
||||||
class Adevarul(BasicNewsRecipe):
|
class Adevarul(BasicNewsRecipe):
|
||||||
title = u'Adev\u0103rul'
|
title = u'Adev\u0103rul'
|
||||||
language = 'ro'
|
language = 'ro'
|
||||||
__author__ = u'Silviu Cotoar\u0103'
|
__author__ = u'Silviu Cotoar\u0103'
|
||||||
description = u'\u0218tiri din Rom\u00e2nia'
|
description = u'\u0218tiri din Rom\u00e2nia'
|
||||||
publisher = 'Adevarul'
|
publisher = 'Adevarul'
|
||||||
category = 'Ziare,Stiri,Romania'
|
category = 'Ziare,Stiri,Romania'
|
||||||
oldest_article = 5
|
oldest_article = 5
|
||||||
max_articles_per_feed = 100
|
max_articles_per_feed = 100
|
||||||
no_stylesheets = True
|
no_stylesheets = True
|
||||||
use_embedded_content = False
|
use_embedded_content = False
|
||||||
encoding = 'utf-8'
|
encoding = 'utf-8'
|
||||||
remove_javascript = True
|
remove_javascript = True
|
||||||
cover_url = 'http://upload.wikimedia.org/wikipedia/en/d/d6/Logo_noul_adevarul.png'
|
cover_url = 'http://upload.wikimedia.org/wikipedia/en/d/d6/Logo_noul_adevarul.png'
|
||||||
|
|
||||||
conversion_options = {
|
conversion_options = {
|
||||||
'comments' : description
|
'comments': description, 'tags': category, 'language': language, 'publisher': publisher
|
||||||
,'tags' : category
|
}
|
||||||
,'language' : language
|
|
||||||
,'publisher' : publisher
|
|
||||||
}
|
|
||||||
|
|
||||||
keep_only_tags = [ dict(name='div', attrs={'class':'article_header'})
|
|
||||||
,dict(name='div', attrs={'class':'bb-tu first-t bb-article-body'})
|
|
||||||
]
|
|
||||||
|
|
||||||
|
keep_only_tags = [dict(name='div', attrs={'class': 'article_header'}), dict(name='div', attrs={'class': 'bb-tu first-t bb-article-body'})
|
||||||
|
]
|
||||||
|
|
||||||
remove_tags = [
|
remove_tags = [
|
||||||
dict(name='li', attrs={'class':'author'})
|
dict(name='li', attrs={'class': 'author'}), dict(name='li', attrs={'class': 'date'}), dict(name='li', attrs={'class': 'comments'}), dict(name='div', attrs={'class': 'bb-wg-article_related_attachements'}), dict(name='div', attrs={'class': 'bb-md bb-md-article_comments'}), dict(name='form', attrs={'id': 'bb-comment-create-form'}), dict(name='div', attrs={'id': 'mediatag'}), dict(name='div', attrs={'id': 'ft'}), dict(name='div', attrs={'id': 'comment_wrapper'}) # noqa
|
||||||
,dict(name='li', attrs={'class':'date'})
|
]
|
||||||
,dict(name='li', attrs={'class':'comments'})
|
|
||||||
,dict(name='div', attrs={'class':'bb-wg-article_related_attachements'})
|
|
||||||
,dict(name='div', attrs={'class':'bb-md bb-md-article_comments'})
|
|
||||||
,dict(name='form', attrs={'id':'bb-comment-create-form'})
|
|
||||||
,dict(name='div', attrs={'id':'mediatag'})
|
|
||||||
,dict(name='div', attrs={'id':'ft'})
|
|
||||||
,dict(name='div', attrs={'id':'comment_wrapper'})
|
|
||||||
]
|
|
||||||
|
|
||||||
remove_tags_after = [
|
remove_tags_after = [
|
||||||
dict(name='div', attrs={'id':'comment_wrapper'}),
|
dict(name='div', attrs={'id': 'comment_wrapper'}),
|
||||||
]
|
]
|
||||||
|
|
||||||
feeds = [ (u'\u0218tiri', u'http://www.adevarul.ro/rss/latest') ]
|
feeds = [(u'\u0218tiri', u'http://www.adevarul.ro/rss/latest')]
|
||||||
|
|
||||||
def preprocess_html(self, soup):
|
def preprocess_html(self, soup):
|
||||||
return self.adeify_images(soup)
|
return self.adeify_images(soup)
|
||||||
|
|
||||||
|
@ -1,8 +1,8 @@
|
|||||||
#!/usr/bin/env python2
|
#!/usr/bin/env python2
|
||||||
__license__ = 'GPL v3'
|
__license__ = 'GPL v3'
|
||||||
__author__ = 'Gabriele Marini, based on Darko Miletic'
|
__author__ = 'Gabriele Marini, based on Darko Miletic'
|
||||||
__copyright__ = '2009-2010, Darko Miletic <darko.miletic at gmail.com>'
|
__copyright__ = '2009-2010, Darko Miletic <darko.miletic at gmail.com>'
|
||||||
description = 'Italian daily newspaper - 02-05-2010'
|
description = 'Italian daily newspaper - 02-05-2010'
|
||||||
|
|
||||||
'''
|
'''
|
||||||
http://www.adnkronos.com/
|
http://www.adnkronos.com/
|
||||||
@ -10,50 +10,49 @@ http://www.adnkronos.com/
|
|||||||
|
|
||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
|
||||||
class Adnkronos(BasicNewsRecipe):
|
|
||||||
__author__ = 'Gabriele Marini'
|
|
||||||
description = 'News agency'
|
|
||||||
cover_url = 'http://www.adnkronos.com/IGN6/img/popup_ign.jpg'
|
|
||||||
title = u'Adnkronos'
|
|
||||||
publisher = 'Adnkronos Group - ews agency'
|
|
||||||
category = 'News, politics, culture, economy, general interest'
|
|
||||||
|
|
||||||
language = 'it'
|
class Adnkronos(BasicNewsRecipe):
|
||||||
timefmt = '[%a, %d %b, %Y]'
|
__author__ = 'Gabriele Marini'
|
||||||
|
description = 'News agency'
|
||||||
|
cover_url = 'http://www.adnkronos.com/IGN6/img/popup_ign.jpg'
|
||||||
|
title = u'Adnkronos'
|
||||||
|
publisher = 'Adnkronos Group - ews agency'
|
||||||
|
category = 'News, politics, culture, economy, general interest'
|
||||||
|
|
||||||
|
language = 'it'
|
||||||
|
timefmt = '[%a, %d %b, %Y]'
|
||||||
|
|
||||||
oldest_article = 7
|
oldest_article = 7
|
||||||
max_articles_per_feed = 80
|
max_articles_per_feed = 80
|
||||||
use_embedded_content = False
|
use_embedded_content = False
|
||||||
recursion = 10
|
recursion = 10
|
||||||
|
|
||||||
remove_javascript = True
|
remove_javascript = True
|
||||||
|
|
||||||
def get_article_url(self, article):
|
def get_article_url(self, article):
|
||||||
link = article.get('id', article.get('guid', None))
|
link = article.get('id', article.get('guid', None))
|
||||||
return link
|
return link
|
||||||
|
|
||||||
extra_css = ' .newsAbstract{font-style: italic} '
|
extra_css = ' .newsAbstract{font-style: italic} '
|
||||||
keep_only_tags = [dict(name='div', attrs={'class':['breadCrumbs','newsTop','newsText']})
|
keep_only_tags = [dict(name='div', attrs={'class': ['breadCrumbs', 'newsTop', 'newsText']})
|
||||||
]
|
|
||||||
|
|
||||||
|
|
||||||
remove_tags = [
|
|
||||||
dict(name='div', attrs={'class':['leogoo','leogoo2']})
|
|
||||||
]
|
|
||||||
|
|
||||||
|
|
||||||
feeds = [
|
|
||||||
(u'Prima Pagina', u'http://rss.adnkronos.com/RSS_PrimaPagina.xml'),
|
|
||||||
(u'Ultima Ora', u'http://rss.adnkronos.com/RSS_Ultimora.xml'),
|
|
||||||
(u'Politica', u'http://rss.adnkronos.com/RSS_Politica.xml'),
|
|
||||||
(u'Esteri', u'http://rss.adnkronos.com/RSS_Esteri.xml'),
|
|
||||||
(u'Cronoca', u'http://rss.adnkronos.com/RSS_Cronaca.xml'),
|
|
||||||
(u'Economia', u'http://rss.adnkronos.com/RSS_Economia.xml'),
|
|
||||||
(u'Finanza', u'http://rss.adnkronos.com/RSS_Finanza.xml'),
|
|
||||||
(u'CyberNews', u'http://rss.adnkronos.com/RSS_CyberNews.xml'),
|
|
||||||
(u'Spettacolo', u'http://rss.adnkronos.com/RSS_Spettacolo.xml'),
|
|
||||||
(u'Cultura', u'http://rss.adnkronos.com/RSS_Cultura.xml'),
|
|
||||||
(u'Sport', u'http://rss.adnkronos.com/RSS_Sport.xml'),
|
|
||||||
(u'Sostenibilita', u'http://rss.adnkronos.com/RSS_Sostenibilita.xml'),
|
|
||||||
(u'Salute', u'http://rss.adnkronos.com/RSS_Salute.xml')
|
|
||||||
]
|
]
|
||||||
|
|
||||||
|
remove_tags = [
|
||||||
|
dict(name='div', attrs={'class': ['leogoo', 'leogoo2']})
|
||||||
|
]
|
||||||
|
|
||||||
|
feeds = [
|
||||||
|
(u'Prima Pagina', u'http://rss.adnkronos.com/RSS_PrimaPagina.xml'),
|
||||||
|
(u'Ultima Ora', u'http://rss.adnkronos.com/RSS_Ultimora.xml'),
|
||||||
|
(u'Politica', u'http://rss.adnkronos.com/RSS_Politica.xml'),
|
||||||
|
(u'Esteri', u'http://rss.adnkronos.com/RSS_Esteri.xml'),
|
||||||
|
(u'Cronoca', u'http://rss.adnkronos.com/RSS_Cronaca.xml'),
|
||||||
|
(u'Economia', u'http://rss.adnkronos.com/RSS_Economia.xml'),
|
||||||
|
(u'Finanza', u'http://rss.adnkronos.com/RSS_Finanza.xml'),
|
||||||
|
(u'CyberNews', u'http://rss.adnkronos.com/RSS_CyberNews.xml'),
|
||||||
|
(u'Spettacolo', u'http://rss.adnkronos.com/RSS_Spettacolo.xml'),
|
||||||
|
(u'Cultura', u'http://rss.adnkronos.com/RSS_Cultura.xml'),
|
||||||
|
(u'Sport', u'http://rss.adnkronos.com/RSS_Sport.xml'),
|
||||||
|
(u'Sostenibilita', u'http://rss.adnkronos.com/RSS_Sostenibilita.xml'),
|
||||||
|
(u'Salute', u'http://rss.adnkronos.com/RSS_Salute.xml')
|
||||||
|
]
|
||||||
|
@ -1,26 +1,26 @@
|
|||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
|
||||||
|
|
||||||
class AdvancedUserRecipe1336986047(BasicNewsRecipe):
|
class AdvancedUserRecipe1336986047(BasicNewsRecipe):
|
||||||
title = u'Ads of the World'
|
title = u'Ads of the World'
|
||||||
oldest_article = 7
|
oldest_article = 7
|
||||||
max_articles_per_feed = 100
|
max_articles_per_feed = 100
|
||||||
auto_cleanup = False
|
auto_cleanup = False
|
||||||
description = 'The best international advertising campaigns'
|
description = 'The best international advertising campaigns'
|
||||||
language = 'en'
|
language = 'en'
|
||||||
__author__ = 'faber1971'
|
__author__ = 'faber1971'
|
||||||
|
|
||||||
no_stylesheets = True
|
no_stylesheets = True
|
||||||
keep_only_tags = [
|
keep_only_tags = [
|
||||||
dict(name='div', attrs={'id':'primary'})
|
dict(name='div', attrs={'id': 'primary'})
|
||||||
]
|
]
|
||||||
|
|
||||||
remove_tags = [
|
remove_tags = [
|
||||||
dict(name='ul', attrs={'class':'links inline'})
|
dict(name='ul', attrs={'class': 'links inline'}), dict(name='div', attrs={'class': 'form-item'}), dict(
|
||||||
,dict(name='div', attrs={'class':'form-item'})
|
name='div', attrs={'id': ['options', 'comments']}), dict(name='ul', attrs={'id': 'nodePager'})
|
||||||
,dict(name='div', attrs={'id':['options', 'comments']})
|
]
|
||||||
,dict(name='ul', attrs={'id':'nodePager'})
|
|
||||||
]
|
|
||||||
|
|
||||||
reverse_article_order = True
|
reverse_article_order = True
|
||||||
masthead_url = 'http://bigcatgroup.co.uk/files/2011/01/05-ads-of-the-world.png'
|
masthead_url = 'http://bigcatgroup.co.uk/files/2011/01/05-ads-of-the-world.png'
|
||||||
feeds = [(u'Ads of the world', u'http://feeds.feedburner.com/adsoftheworld-latest')]
|
feeds = [
|
||||||
|
(u'Ads of the world', u'http://feeds.feedburner.com/adsoftheworld-latest')]
|
||||||
|
@ -1,10 +1,12 @@
|
|||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
|
||||||
|
|
||||||
class Adventure_zone(BasicNewsRecipe):
|
class Adventure_zone(BasicNewsRecipe):
|
||||||
title = u'Adventure Zone'
|
title = u'Adventure Zone'
|
||||||
__author__ = 'fenuks'
|
__author__ = 'fenuks'
|
||||||
description = u'Czytaj więcej o przygodzie - codzienne nowinki. Szukaj u nas solucji i poradników, czytaj recenzje i zapowiedzi. Także galeria, pliki oraz forum dla wszystkich fanów gier przygodowych.'
|
description = u'Czytaj więcej o przygodzie - codzienne nowinki. Szukaj u nas solucji i poradników, czytaj recenzje i zapowiedzi. Także galeria, pliki oraz forum dla wszystkich fanów gier przygodowych.' # noqa
|
||||||
category = 'games'
|
category = 'games'
|
||||||
language = 'pl'
|
language = 'pl'
|
||||||
BASEURL = 'http://www.adventure-zone.info/fusion/'
|
BASEURL = 'http://www.adventure-zone.info/fusion/'
|
||||||
no_stylesheets = True
|
no_stylesheets = True
|
||||||
extra_css = '.image {float: left; margin-right: 5px;}'
|
extra_css = '.image {float: left; margin-right: 5px;}'
|
||||||
@ -13,20 +15,20 @@ class Adventure_zone(BasicNewsRecipe):
|
|||||||
cover_url = 'http://www.adventure-zone.info/inne/logoaz_2012.png'
|
cover_url = 'http://www.adventure-zone.info/inne/logoaz_2012.png'
|
||||||
remove_attributes = ['style']
|
remove_attributes = ['style']
|
||||||
use_embedded_content = False
|
use_embedded_content = False
|
||||||
keep_only_tags = [dict(attrs={'class':'content'})]
|
keep_only_tags = [dict(attrs={'class': 'content'})]
|
||||||
remove_tags = [dict(attrs={'class':'footer'})]
|
remove_tags = [dict(attrs={'class': 'footer'})]
|
||||||
feeds = [(u'Nowinki', u'http://www.adventure-zone.info/fusion/rss/index.php')]
|
feeds = [(u'Nowinki', u'http://www.adventure-zone.info/fusion/rss/index.php')]
|
||||||
|
|
||||||
def skip_ad_pages(self, soup):
|
def skip_ad_pages(self, soup):
|
||||||
skip_tag = soup.body.find(attrs={'class':'content'})
|
skip_tag = soup.body.find(attrs={'class': 'content'})
|
||||||
skip_tag = skip_tag.findAll(name='a')
|
skip_tag = skip_tag.findAll(name='a')
|
||||||
title = soup.title.string.lower()
|
title = soup.title.string.lower()
|
||||||
if (('zapowied' in title) or ('recenzj' in title) or ('solucj' in title) or ('poradnik' in title)):
|
if (('zapowied' in title) or ('recenzj' in title) or ('solucj' in title) or ('poradnik' in title)):
|
||||||
for r in skip_tag:
|
for r in skip_tag:
|
||||||
if r.strong and r.strong.string:
|
if r.strong and r.strong.string:
|
||||||
word=r.strong.string.lower()
|
word = r.strong.string.lower()
|
||||||
if (('zapowied' in word) or ('recenzj' in word) or ('solucj' in word) or ('poradnik' in word)):
|
if (('zapowied' in word) or ('recenzj' in word) or ('solucj' in word) or ('poradnik' in word)):
|
||||||
return self.index_to_soup(self.BASEURL+r['href'], raw=True)
|
return self.index_to_soup(self.BASEURL + r['href'], raw=True)
|
||||||
|
|
||||||
def preprocess_html(self, soup):
|
def preprocess_html(self, soup):
|
||||||
for link in soup.findAll('a', href=True):
|
for link in soup.findAll('a', href=True):
|
||||||
|
@ -1,4 +1,4 @@
|
|||||||
__license__ = 'GPL v3'
|
__license__ = 'GPL v3'
|
||||||
__copyright__ = '2009-2012, Darko Miletic <darko.miletic at gmail.com>'
|
__copyright__ = '2009-2012, Darko Miletic <darko.miletic at gmail.com>'
|
||||||
'''
|
'''
|
||||||
www.adventuregamers.com
|
www.adventuregamers.com
|
||||||
@ -6,21 +6,21 @@ www.adventuregamers.com
|
|||||||
|
|
||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
|
||||||
|
|
||||||
class AdventureGamers(BasicNewsRecipe):
|
class AdventureGamers(BasicNewsRecipe):
|
||||||
title = u'Adventure Gamers'
|
title = u'Adventure Gamers'
|
||||||
language = 'en'
|
language = 'en'
|
||||||
__author__ = 'Darko Miletic'
|
__author__ = 'Darko Miletic'
|
||||||
description = 'Adventure games portal'
|
description = 'Adventure games portal'
|
||||||
publisher = 'Adventure Gamers'
|
publisher = 'Adventure Gamers'
|
||||||
category = 'news, games, adventure, technology'
|
category = 'news, games, adventure, technology'
|
||||||
oldest_article = 10
|
oldest_article = 10
|
||||||
#delay = 10
|
|
||||||
max_articles_per_feed = 100
|
max_articles_per_feed = 100
|
||||||
no_stylesheets = True
|
no_stylesheets = True
|
||||||
encoding = 'utf8'
|
encoding = 'utf8'
|
||||||
remove_javascript = True
|
remove_javascript = True
|
||||||
use_embedded_content = False
|
use_embedded_content = False
|
||||||
INDEX = u'http://www.adventuregamers.com'
|
INDEX = u'http://www.adventuregamers.com'
|
||||||
extra_css = """
|
extra_css = """
|
||||||
.pageheader_type{font-size: x-large; font-weight: bold; color: #828D74}
|
.pageheader_type{font-size: x-large; font-weight: bold; color: #828D74}
|
||||||
.pageheader_title,.page_title{font-size: xx-large; color: #394128}
|
.pageheader_title,.page_title{font-size: xx-large; color: #394128}
|
||||||
@ -29,59 +29,54 @@ class AdventureGamers(BasicNewsRecipe):
|
|||||||
.score_column_1{ padding-left: 10px; font-size: small; width: 50%}
|
.score_column_1{ padding-left: 10px; font-size: small; width: 50%}
|
||||||
.score_column_2{ padding-left: 10px; font-size: small; width: 50%}
|
.score_column_2{ padding-left: 10px; font-size: small; width: 50%}
|
||||||
.score_column_3{ padding-left: 10px; font-size: small; width: 50%}
|
.score_column_3{ padding-left: 10px; font-size: small; width: 50%}
|
||||||
.score_header{font-size: large; color: #50544A}
|
.score_header{font-size: large; color: #50544A}
|
||||||
img{margin-bottom: 1em;}
|
img{margin-bottom: 1em;}
|
||||||
body{font-family: 'Open Sans',Helvetica,Arial,sans-serif}
|
body{font-family: 'Open Sans',Helvetica,Arial,sans-serif}
|
||||||
"""
|
"""
|
||||||
|
|
||||||
conversion_options = {
|
conversion_options = {
|
||||||
'comment' : description
|
'comment': description, 'tags': category, 'publisher': publisher, 'language': language
|
||||||
, 'tags' : category
|
}
|
||||||
, 'publisher' : publisher
|
|
||||||
, 'language' : language
|
|
||||||
}
|
|
||||||
|
|
||||||
keep_only_tags = [dict(name='div', attrs={'class':'cleft_inn'})]
|
keep_only_tags = [dict(name='div', attrs={'class': 'cleft_inn'})]
|
||||||
remove_tags = [
|
remove_tags = [
|
||||||
dict(name=['object','link','embed','form','iframe','meta'])
|
dict(name=['object', 'link', 'embed', 'form', 'iframe', 'meta']), dict(name='a', attrs={
|
||||||
,dict(name='a', attrs={'href':'http://www.adventuregamers.com/about/scoring'})
|
'href': 'http://www.adventuregamers.com/about/scoring'}), dict(name='a', attrs={'href': 'http://www.adventuregamers.com/about/policies'})
|
||||||
,dict(name='a', attrs={'href':'http://www.adventuregamers.com/about/policies'})
|
]
|
||||||
]
|
remove_tags_after = [dict(name='div', attrs={'class': 'bodytext'})]
|
||||||
remove_tags_after = [dict(name='div', attrs={'class':'bodytext'})]
|
remove_attributes = ['width', 'height']
|
||||||
remove_attributes = ['width','height']
|
|
||||||
|
|
||||||
feeds = [(u'Articles', u'http://www.adventuregamers.com/rss/')]
|
feeds = [(u'Articles', u'http://www.adventuregamers.com/rss/')]
|
||||||
|
|
||||||
def get_article_url(self, article):
|
def get_article_url(self, article):
|
||||||
url = BasicNewsRecipe.get_article_url(self, article)
|
url = BasicNewsRecipe.get_article_url(self, article)
|
||||||
if '/videos/' in url or '/hypeometer/' in url:
|
if '/videos/' in url or '/hypeometer/' in url:
|
||||||
return None
|
return None
|
||||||
return url
|
return url
|
||||||
|
|
||||||
def append_page(self, soup, appendtag, position):
|
def append_page(self, soup, appendtag, position):
|
||||||
pager = soup.find('div', attrs={'class':'pagination_big'})
|
pager = soup.find('div', attrs={'class': 'pagination_big'})
|
||||||
if pager:
|
if pager:
|
||||||
nextpage = soup.find('a', attrs={'class':'next-page'})
|
nextpage = soup.find('a', attrs={'class': 'next-page'})
|
||||||
if nextpage:
|
if nextpage:
|
||||||
nexturl = nextpage['href']
|
nexturl = nextpage['href']
|
||||||
soup2 = self.index_to_soup(nexturl)
|
soup2 = self.index_to_soup(nexturl)
|
||||||
texttag = soup2.find('div', attrs={'class':'bodytext'})
|
texttag = soup2.find('div', attrs={'class': 'bodytext'})
|
||||||
for it in texttag.findAll(style=True):
|
for it in texttag.findAll(style=True):
|
||||||
del it['style']
|
del it['style']
|
||||||
newpos = len(texttag.contents)
|
newpos = len(texttag.contents)
|
||||||
self.append_page(soup2,texttag,newpos)
|
self.append_page(soup2, texttag, newpos)
|
||||||
texttag.extract()
|
texttag.extract()
|
||||||
pager.extract()
|
pager.extract()
|
||||||
appendtag.insert(position,texttag)
|
appendtag.insert(position, texttag)
|
||||||
|
|
||||||
|
|
||||||
def preprocess_html(self, soup):
|
def preprocess_html(self, soup):
|
||||||
for item in soup.findAll(style=True):
|
for item in soup.findAll(style=True):
|
||||||
del item['style']
|
del item['style']
|
||||||
for item in soup.findAll('div', attrs={'class':'floatright'}):
|
for item in soup.findAll('div', attrs={'class': 'floatright'}):
|
||||||
item.extract()
|
item.extract()
|
||||||
self.append_page(soup, soup.body, 3)
|
self.append_page(soup, soup.body, 3)
|
||||||
pager = soup.find('div',attrs={'class':'pagination_big'})
|
pager = soup.find('div', attrs={'class': 'pagination_big'})
|
||||||
if pager:
|
if pager:
|
||||||
pager.extract()
|
pager.extract()
|
||||||
return self.adeify_images(soup)
|
return self.adeify_images(soup)
|
||||||
|
@ -1,20 +1,20 @@
|
|||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
|
||||||
|
|
||||||
class Aftenposten(BasicNewsRecipe):
|
class Aftenposten(BasicNewsRecipe):
|
||||||
title = u'Aftenposten'
|
title = u'Aftenposten'
|
||||||
__author__ = 'davotibarna'
|
__author__ = 'davotibarna'
|
||||||
description = 'Norske nyheter'
|
description = 'Norske nyheter'
|
||||||
language = 'no'
|
language = 'no'
|
||||||
oldest_article = 5
|
oldest_article = 5
|
||||||
max_articles_per_feed = 100
|
max_articles_per_feed = 100
|
||||||
recipe_disabled = ('The recipe to download Aftenposten has been '
|
recipe_disabled = ('The recipe to download Aftenposten has been '
|
||||||
'temporarily disabled at the publisher\'s request, while '
|
'temporarily disabled at the publisher\'s request, while '
|
||||||
'they finalize their digital strategy.')
|
'they finalize their digital strategy.')
|
||||||
no_stylesheets = True
|
no_stylesheets = True
|
||||||
encoding = 'ISO-8859-1'
|
encoding = 'ISO-8859-1'
|
||||||
|
|
||||||
feeds = [(u'Aftenposten', u'http://www.aftenposten.no/eksport/rss-1_0/')]
|
feeds = [(u'Aftenposten', u'http://www.aftenposten.no/eksport/rss-1_0/')]
|
||||||
|
|
||||||
def print_version(self, url):
|
|
||||||
return url.replace('#xtor=RSS-3', '?service=print')
|
|
||||||
|
|
||||||
|
def print_version(self, url):
|
||||||
|
return url.replace('#xtor=RSS-3', '?service=print')
|
||||||
|
@ -1,5 +1,5 @@
|
|||||||
|
|
||||||
__license__ = 'GPL v3'
|
__license__ = 'GPL v3'
|
||||||
__copyright__ = '2010, Darko Miletic <darko.miletic at gmail.com>'
|
__copyright__ = '2010, Darko Miletic <darko.miletic at gmail.com>'
|
||||||
'''
|
'''
|
||||||
boljevac.blogspot.com
|
boljevac.blogspot.com
|
||||||
@ -8,25 +8,23 @@ boljevac.blogspot.com
|
|||||||
import re
|
import re
|
||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
|
||||||
|
|
||||||
class AgroGerila(BasicNewsRecipe):
|
class AgroGerila(BasicNewsRecipe):
|
||||||
title = 'Agro Gerila'
|
title = 'Agro Gerila'
|
||||||
__author__ = 'Darko Miletic'
|
__author__ = 'Darko Miletic'
|
||||||
description = 'Politicki nekorektan blog.'
|
description = 'Politicki nekorektan blog.'
|
||||||
oldest_article = 45
|
oldest_article = 45
|
||||||
max_articles_per_feed = 100
|
max_articles_per_feed = 100
|
||||||
language = 'sr'
|
language = 'sr'
|
||||||
encoding = 'utf-8'
|
encoding = 'utf-8'
|
||||||
no_stylesheets = True
|
no_stylesheets = True
|
||||||
use_embedded_content = True
|
use_embedded_content = True
|
||||||
publication_type = 'blog'
|
publication_type = 'blog'
|
||||||
extra_css = ' @font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)} body{font-family: "Trebuchet MS",Trebuchet,Verdana,sans1,sans-serif} .article_description{font-family: sans1, sans-serif} img{margin-bottom: 0.8em; border: 1px solid #333333; padding: 4px } '
|
extra_css = ' @font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)} body{font-family: "Trebuchet MS",Trebuchet,Verdana,sans1,sans-serif} .article_description{font-family: sans1, sans-serif} img{margin-bottom: 0.8em; border: 1px solid #333333; padding: 4px } ' # noqa
|
||||||
|
|
||||||
conversion_options = {
|
conversion_options = {
|
||||||
'comment' : description
|
'comment': description, 'tags': 'film, blog, srbija', 'publisher': 'Dry-Na-Nord', 'language': language
|
||||||
, 'tags' : 'film, blog, srbija'
|
}
|
||||||
, 'publisher': 'Dry-Na-Nord'
|
|
||||||
, 'language' : language
|
|
||||||
}
|
|
||||||
|
|
||||||
preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')]
|
preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')]
|
||||||
|
|
||||||
@ -36,5 +34,3 @@ class AgroGerila(BasicNewsRecipe):
|
|||||||
for item in soup.findAll(style=True):
|
for item in soup.findAll(style=True):
|
||||||
del item['style']
|
del item['style']
|
||||||
return self.adeify_images(soup)
|
return self.adeify_images(soup)
|
||||||
|
|
||||||
|
|
||||||
|
@ -1,4 +1,4 @@
|
|||||||
__license__ = 'GPL v3'
|
__license__ = 'GPL v3'
|
||||||
__copyright__ = '2010 - 2014, Darko Miletic <darko.miletic at gmail.com>'
|
__copyright__ = '2010 - 2014, Darko Miletic <darko.miletic at gmail.com>'
|
||||||
'''
|
'''
|
||||||
www.aif.ru
|
www.aif.ru
|
||||||
@ -6,35 +6,32 @@ www.aif.ru
|
|||||||
|
|
||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
|
||||||
|
|
||||||
class AIF_ru(BasicNewsRecipe):
|
class AIF_ru(BasicNewsRecipe):
|
||||||
title = 'Arguments & Facts - Russian'
|
title = 'Arguments & Facts - Russian'
|
||||||
__author__ = 'Darko Miletic'
|
__author__ = 'Darko Miletic'
|
||||||
description = 'News from Russia'
|
description = 'News from Russia'
|
||||||
publisher = 'AIF'
|
publisher = 'AIF'
|
||||||
category = 'news, politics, Russia'
|
category = 'news, politics, Russia'
|
||||||
oldest_article = 2
|
oldest_article = 2
|
||||||
max_articles_per_feed = 100
|
max_articles_per_feed = 100
|
||||||
no_stylesheets = True
|
no_stylesheets = True
|
||||||
use_embedded_content = False
|
use_embedded_content = False
|
||||||
encoding = 'utf8'
|
encoding = 'utf8'
|
||||||
language = 'ru'
|
language = 'ru'
|
||||||
publication_type = 'magazine'
|
publication_type = 'magazine'
|
||||||
masthead_url = 'http://static3.aif.ru/glossy/index/i/logo.png'
|
masthead_url = 'http://static3.aif.ru/glossy/index/i/logo.png'
|
||||||
extra_css = """
|
extra_css = """
|
||||||
body{font-family: Verdana,Arial,Helvetica,sans1,sans-serif}
|
body{font-family: Verdana,Arial,Helvetica,sans1,sans-serif}
|
||||||
img{display: block}
|
img{display: block}
|
||||||
"""
|
"""
|
||||||
keep_only_tags = [
|
keep_only_tags = [
|
||||||
dict(name='h1', attrs={'class':'title'})
|
dict(name='h1', attrs={'class': 'title'}), dict(name='div', attrs={'class': 'prew_tags'}), dict(
|
||||||
,dict(name='div', attrs={'class':'prew_tags'})
|
name='article', attrs={'class': lambda x: x and 'articl_body' in x.split()})
|
||||||
,dict(name='article', attrs={'class':lambda x: x and 'articl_body' in x.split()})
|
]
|
||||||
]
|
remove_tags = [
|
||||||
remove_tags = [
|
dict(name=['iframe', 'object', 'link', 'base', 'input', 'meta']), dict(name='div', attrs={'class': 'in-topic'}), dict(name='div', attrs={
|
||||||
dict(name=['iframe','object','link','base','input','meta'])
|
'class': lambda x: x and 'related_article' in x.split()}), dict(name='div', attrs={'class': lambda x: x and 'articl_tag' in x.split()})
|
||||||
,dict(name='div',attrs={'class':'in-topic'})
|
]
|
||||||
,dict(name='div', attrs={'class':lambda x: x and 'related_article' in x.split()})
|
|
||||||
,dict(name='div', attrs={'class':lambda x: x and 'articl_tag' in x.split()})
|
|
||||||
]
|
|
||||||
|
|
||||||
feeds = [(u'News', u'http://www.aif.ru/rss/all.php')]
|
|
||||||
|
|
||||||
|
feeds = [(u'News', u'http://www.aif.ru/rss/all.php')]
|
||||||
|
@ -1,5 +1,6 @@
|
|||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
|
||||||
|
|
||||||
class AirForceTimes(BasicNewsRecipe):
|
class AirForceTimes(BasicNewsRecipe):
|
||||||
title = 'Air Force Times'
|
title = 'Air Force Times'
|
||||||
__author__ = 'jde'
|
__author__ = 'jde'
|
||||||
@ -12,7 +13,7 @@ class AirForceTimes(BasicNewsRecipe):
|
|||||||
tags = 'news, U.S. Air Force'
|
tags = 'news, U.S. Air Force'
|
||||||
cover_url = 'http://www.airforcetimes.com/images/logo_airforcetimes_alert.jpg'
|
cover_url = 'http://www.airforcetimes.com/images/logo_airforcetimes_alert.jpg'
|
||||||
masthead_url = 'http://www.airforcetimes.com/images/logo_airforcetimes_alert.jpg'
|
masthead_url = 'http://www.airforcetimes.com/images/logo_airforcetimes_alert.jpg'
|
||||||
oldest_article = 7 #days
|
oldest_article = 7 # days
|
||||||
max_articles_per_feed = 25
|
max_articles_per_feed = 25
|
||||||
publication_type = 'newspaper'
|
publication_type = 'newspaper'
|
||||||
no_stylesheets = True
|
no_stylesheets = True
|
||||||
@ -24,20 +25,14 @@ class AirForceTimes(BasicNewsRecipe):
|
|||||||
remove_empty_feeds = True
|
remove_empty_feeds = True
|
||||||
auto_cleanup = True
|
auto_cleanup = True
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
feeds = [
|
feeds = [
|
||||||
|
|
||||||
('News', 'http://www.airforcetimes.com/rss_news.php'),
|
('News', 'http://www.airforcetimes.com/rss_news.php'),
|
||||||
('Benefits', 'http://www.airforcetimes.com/rss_benefits.php'),
|
('Benefits', 'http://www.airforcetimes.com/rss_benefits.php'),
|
||||||
('Money', 'http://www.airforcetimes.com/rss_money.php'),
|
('Money', 'http://www.airforcetimes.com/rss_money.php'),
|
||||||
('Careers & Education', 'http://www.airforcetimes.com/rss_careers.php'),
|
('Careers & Education', 'http://www.airforcetimes.com/rss_careers.php'),
|
||||||
('Community', 'http://www.airforcetimes.com/rss_community.php'),
|
('Community', 'http://www.airforcetimes.com/rss_community.php'),
|
||||||
('Off Duty', 'http://www.airforcetimes.com/rss_off_duty.php'),
|
('Off Duty', 'http://www.airforcetimes.com/rss_off_duty.php'),
|
||||||
('Entertainment', 'http://www.airforcetimes.com/rss_entertainment.php'),
|
('Entertainment', 'http://www.airforcetimes.com/rss_entertainment.php'),
|
||||||
('Guard & Reserve', 'http://www.airforcetimes.com/rss_guard.php'),
|
('Guard & Reserve', 'http://www.airforcetimes.com/rss_guard.php'),
|
||||||
]
|
]
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
@ -1,15 +1,17 @@
|
|||||||
#!/usr/bin/env python2
|
#!/usr/bin/env python2
|
||||||
__license__ = 'Creative Commons Attribution 4.0 International License'
|
__license__ = 'Creative Commons Attribution 4.0 International License'
|
||||||
__author__ = 'John McDole'
|
__author__ = 'John McDole'
|
||||||
__copyright__ = ''
|
__copyright__ = ''
|
||||||
__version__ = '0.1'
|
__version__ = '0.1'
|
||||||
__date__ = '2015/01/10'
|
__date__ = '2015/01/10'
|
||||||
__docformat__ = 'restructuredtext en'
|
__docformat__ = 'restructuredtext en'
|
||||||
|
|
||||||
import datetime, re
|
import datetime
|
||||||
|
import re
|
||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
from calibre.ebooks.BeautifulSoup import Tag
|
from calibre.ebooks.BeautifulSoup import Tag
|
||||||
|
|
||||||
|
|
||||||
class AdvancedUserRecipe1282101454(BasicNewsRecipe):
|
class AdvancedUserRecipe1282101454(BasicNewsRecipe):
|
||||||
now = datetime.datetime.now()
|
now = datetime.datetime.now()
|
||||||
title = 'The AJC'
|
title = 'The AJC'
|
||||||
@ -24,72 +26,81 @@ class AdvancedUserRecipe1282101454(BasicNewsRecipe):
|
|||||||
max_articles_per_feed = 100
|
max_articles_per_feed = 100
|
||||||
no_stylesheets = True
|
no_stylesheets = True
|
||||||
|
|
||||||
# The AJC lists identical articles in multiple feeds; this removes them based on their URL
|
# The AJC lists identical articles in multiple feeds; this removes them
|
||||||
|
# based on their URL
|
||||||
ignore_duplicate_articles = {'title', 'url'}
|
ignore_duplicate_articles = {'title', 'url'}
|
||||||
|
|
||||||
# And this says "Hey, AJC, different feeds should mean something!"
|
# And this says "Hey, AJC, different feeds should mean something!"
|
||||||
remove_empty_feeds = True
|
remove_empty_feeds = True
|
||||||
|
|
||||||
# Sets whether a feed has full articles embedded in it. The AJC feeds do not.
|
# Sets whether a feed has full articles embedded in it. The AJC feeds do
|
||||||
|
# not.
|
||||||
use_embedded_content = False
|
use_embedded_content = False
|
||||||
|
|
||||||
masthead_url = 'http://gawand.org/wp-content/uploads/2010/06/ajc-logo.gif'
|
masthead_url = 'http://gawand.org/wp-content/uploads/2010/06/ajc-logo.gif'
|
||||||
|
|
||||||
# Pick your poison. Business seems to be mostly cross-linked articles. Premium and cross-linked
|
# Pick your poison. Business seems to be mostly cross-linked articles. Premium and cross-linked
|
||||||
# articels will be dropped.
|
# articels will be dropped.
|
||||||
feeds = [
|
feeds = [
|
||||||
('Breaking News', 'http://www.ajc.com/list/rss/online/ajc-auto-list-iphone-topnews/aFKq/'),
|
('Breaking News', 'http://www.ajc.com/list/rss/online/ajc-auto-list-iphone-topnews/aFKq/'),
|
||||||
('Metro and Georgia', 'http://www.ajc.com/list/rss/news/local/news-georgia-and-region/aCxP/'),
|
('Metro and Georgia',
|
||||||
('Business', 'http://www.ajc.com/feeds/categories/business/'),
|
'http://www.ajc.com/list/rss/news/local/news-georgia-and-region/aCxP/'),
|
||||||
('Health', 'http://www.ajc.com/feeds/categories/health/'),
|
('Business', 'http://www.ajc.com/feeds/categories/business/'),
|
||||||
# ('Braves', 'http://www.ajc.com/list/rss/sports/baseball/atlanta-braves-news/aGpN/'),
|
('Health', 'http://www.ajc.com/feeds/categories/health/'),
|
||||||
# ('Falcons', 'http://www.ajc.com/list/rss/sports/football/falcons-news/aGK4/'),
|
# ('Braves', 'http://www.ajc.com/list/rss/sports/baseball/atlanta-braves-news/aGpN/'),
|
||||||
# ('Georgia Tech Yellow Jackets', 'http://www.ajc.com/list/rss/sports/college/georgia-tech-headlines/aGK6/'),
|
# ('Falcons', 'http://www.ajc.com/list/rss/sports/football/falcons-news/aGK4/'),
|
||||||
]
|
# ('Georgia Tech Yellow Jackets', 'http://www.ajc.com/list/rss/sports/college/georgia-tech-headlines/aGK6/'),
|
||||||
|
]
|
||||||
|
|
||||||
headline_reg_exp = '^.*cm-story-headline.*$'
|
headline_reg_exp = '^.*cm-story-headline.*$'
|
||||||
story_body_reg_exp = '^.*cm-story-body.*$'
|
story_body_reg_exp = '^.*cm-story-body.*$'
|
||||||
author_reg_exp = '^.*cm-story-author.*$'
|
author_reg_exp = '^.*cm-story-author.*$'
|
||||||
|
|
||||||
keep_only_tags = [
|
keep_only_tags = [
|
||||||
dict(name='div', attrs={'class':re.compile(headline_reg_exp, re.IGNORECASE)}),
|
dict(name='div', attrs={'class': re.compile(
|
||||||
dict(name='div', attrs={'class':'cm-story-meta'}),
|
headline_reg_exp, re.IGNORECASE)}),
|
||||||
dict(name='div', attrs={'class':re.compile(author_reg_exp, re.IGNORECASE)}),
|
dict(name='div', attrs={'class': 'cm-story-meta'}),
|
||||||
dict(name='meta', attrs={'name':'description'}),
|
dict(name='div', attrs={'class': re.compile(
|
||||||
dict(name='div', attrs={'class':re.compile(story_body_reg_exp, re.IGNORECASE)}),
|
author_reg_exp, re.IGNORECASE)}),
|
||||||
]
|
dict(name='meta', attrs={'name': 'description'}),
|
||||||
|
dict(name='div', attrs={'class': re.compile(
|
||||||
|
story_body_reg_exp, re.IGNORECASE)}),
|
||||||
|
]
|
||||||
|
|
||||||
premium_reg_exp = '^.*cmPremiumContent.*$'
|
premium_reg_exp = '^.*cmPremiumContent.*$'
|
||||||
footer_reg_exp = '^.*cm-story-footer.*$'
|
footer_reg_exp = '^.*cm-story-footer.*$'
|
||||||
|
|
||||||
remove_tags = [
|
remove_tags = [
|
||||||
dict(name='div', attrs={'class':re.compile(footer_reg_exp, re.IGNORECASE)}),
|
dict(name='div', attrs={'class': re.compile(
|
||||||
dict(name='div', attrs={'class':'cm-inline-related-group'})
|
footer_reg_exp, re.IGNORECASE)}),
|
||||||
]
|
dict(name='div', attrs={'class': 'cm-inline-related-group'})
|
||||||
|
]
|
||||||
|
|
||||||
extra_css = 'body { font-family: verdana, helvetica, sans-serif; } \
|
extra_css = 'body { font-family: verdana, helvetica, sans-serif; } \
|
||||||
.cm-story-headline h1 { text-align: center; font-size: 175%; font-weight: bold; } \
|
.cm-story-headline h1 { text-align: center; font-size: 175%; font-weight: bold; } \
|
||||||
.cm-story-meta { font-size: 80%; } \
|
.cm-story-meta { font-size: 80%; } \
|
||||||
.cm-related-caption, .cmPhotoImageAttribution, img { display: block; font-size: 75%; font-style: italic; text-align: center; margin: 5px auto;} \
|
.cm-related-caption, .cmPhotoImageAttribution, img { display: block; font-size: 75%; font-style: italic; text-align: center; margin: 5px auto;} \
|
||||||
.cm-story-author { display: block; font-size: 80%; font-style: italic; }'
|
.cm-story-author { display: block; font-size: 80%; font-style: italic; }'
|
||||||
|
|
||||||
# I would love to remove these completely from the finished product, but I can't see how at the momemnt.
|
# I would love to remove these completely from the finished product, but I can't see how at the momemnt.
|
||||||
# Retuning "None" from preprocess_html(soup) as suggested in mobileread forums leads to errors.
|
# Retuning "None" from preprocess_html(soup) as suggested in mobileread
|
||||||
|
# forums leads to errors.
|
||||||
def preprocess_html(self, soup):
|
def preprocess_html(self, soup):
|
||||||
premium = soup.find('div', attrs={'class':re.compile(self.premium_reg_exp, re.IGNORECASE)})
|
premium = soup.find('div', attrs={'class': re.compile(
|
||||||
|
self.premium_reg_exp, re.IGNORECASE)})
|
||||||
if premium:
|
if premium:
|
||||||
return None
|
return None
|
||||||
crosslink = soup.find('a', attrs={'class':'cm-feed-story-more-link'})
|
crosslink = soup.find('a', attrs={'class': 'cm-feed-story-more-link'})
|
||||||
if crosslink:
|
if crosslink:
|
||||||
return None
|
return None
|
||||||
return soup
|
return soup
|
||||||
|
|
||||||
def populate_article_metadata(self, article, soup, first):
|
def populate_article_metadata(self, article, soup, first):
|
||||||
for meta in soup.findAll('meta', attrs={'name':'description'}):
|
for meta in soup.findAll('meta', attrs={'name': 'description'}):
|
||||||
article.text_summary = meta['content']
|
article.text_summary = meta['content']
|
||||||
article.summary = meta['content']
|
article.summary = meta['content']
|
||||||
|
|
||||||
lead = soup.find('div', attrs={'class':'cm-story-photo'})
|
lead = soup.find('div', attrs={'class': 'cm-story-photo'})
|
||||||
if lead:
|
if lead:
|
||||||
lead = lead.find('img')
|
lead = lead.find('img')
|
||||||
else:
|
else:
|
||||||
@ -98,10 +109,10 @@ class AdvancedUserRecipe1282101454(BasicNewsRecipe):
|
|||||||
self.add_toc_thumbnail(article, lead['src'])
|
self.add_toc_thumbnail(article, lead['src'])
|
||||||
names = ''
|
names = ''
|
||||||
comma = ''
|
comma = ''
|
||||||
for div in soup.findAll('div', attrs={'class':re.compile(self.author_reg_exp, re.IGNORECASE)}):
|
for div in soup.findAll('div', attrs={'class': re.compile(self.author_reg_exp, re.IGNORECASE)}):
|
||||||
div.extract()
|
div.extract()
|
||||||
for auth in div.findAll('a'):
|
for auth in div.findAll('a'):
|
||||||
if (auth.has_key('class') and auth['class'] == 'cm-source-image'):
|
if (auth.has_key('class') and auth['class'] == 'cm-source-image'): # noqa
|
||||||
continue
|
continue
|
||||||
names = names + comma + auth.contents[0]
|
names = names + comma + auth.contents[0]
|
||||||
comma = ', '
|
comma = ', '
|
||||||
@ -110,7 +121,6 @@ class AdvancedUserRecipe1282101454(BasicNewsRecipe):
|
|||||||
tag = Tag(soup, 'div', [('class', 'cm-story-author')])
|
tag = Tag(soup, 'div', [('class', 'cm-story-author')])
|
||||||
tag.append("by: ")
|
tag.append("by: ")
|
||||||
tag.append(names)
|
tag.append(names)
|
||||||
meta = soup.find('div', attrs={'class':'cm-story-meta'})
|
meta = soup.find('div', attrs={'class': 'cm-story-meta'})
|
||||||
meta_idx = meta.parent.contents.index(meta)
|
meta_idx = meta.parent.contents.index(meta)
|
||||||
meta.parent.insert(meta_idx + 1, tag)
|
meta.parent.insert(meta_idx + 1, tag)
|
||||||
|
|
||||||
|
@ -1,4 +1,4 @@
|
|||||||
__license__ = 'GPL v3'
|
__license__ = 'GPL v3'
|
||||||
__copyright__ = '2010, Hiroshi Miura <miurahr@linux.com>'
|
__copyright__ = '2010, Hiroshi Miura <miurahr@linux.com>'
|
||||||
'''
|
'''
|
||||||
ajiajin.com/blog
|
ajiajin.com/blog
|
||||||
@ -6,18 +6,17 @@ ajiajin.com/blog
|
|||||||
|
|
||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
|
||||||
|
|
||||||
class AjiajinBlog(BasicNewsRecipe):
|
class AjiajinBlog(BasicNewsRecipe):
|
||||||
title = u'Ajiajin blog'
|
title = u'Ajiajin blog'
|
||||||
__author__ = 'Hiroshi Miura'
|
__author__ = 'Hiroshi Miura'
|
||||||
oldest_article = 5
|
oldest_article = 5
|
||||||
publication_type = 'blog'
|
publication_type = 'blog'
|
||||||
max_articles_per_feed = 100
|
max_articles_per_feed = 100
|
||||||
description = 'The next generation internet trends in Japan and Asia'
|
description = 'The next generation internet trends in Japan and Asia'
|
||||||
publisher = ''
|
publisher = ''
|
||||||
category = 'internet, asia, japan'
|
category = 'internet, asia, japan'
|
||||||
language = 'en'
|
language = 'en'
|
||||||
encoding = 'utf-8'
|
encoding = 'utf-8'
|
||||||
|
|
||||||
feeds = [(u'blog', u'http://feeds.feedburner.com/Asiajin')]
|
|
||||||
|
|
||||||
|
|
||||||
|
feeds = [(u'blog', u'http://feeds.feedburner.com/Asiajin')]
|
||||||
|
@ -2,46 +2,51 @@
|
|||||||
|
|
||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
|
||||||
|
|
||||||
class Aksiyon (BasicNewsRecipe):
|
class Aksiyon (BasicNewsRecipe):
|
||||||
|
|
||||||
title = u'Aksiyon Dergisi'
|
title = u'Aksiyon Dergisi'
|
||||||
__author__ = u'thomass'
|
__author__ = u'thomass'
|
||||||
description = 'Haftalık haber dergisi '
|
description = 'Haftalık haber dergisi '
|
||||||
oldest_article =13
|
oldest_article = 13
|
||||||
max_articles_per_feed =100
|
max_articles_per_feed = 100
|
||||||
no_stylesheets = True
|
no_stylesheets = True
|
||||||
#delay = 1
|
encoding = 'utf-8'
|
||||||
#use_embedded_content = False
|
publisher = 'Aksiyon'
|
||||||
encoding = 'utf-8'
|
category = 'news, haberler,TR,gazete'
|
||||||
publisher = 'Aksiyon'
|
language = 'tr'
|
||||||
category = 'news, haberler,TR,gazete'
|
|
||||||
language = 'tr'
|
|
||||||
publication_type = 'magazine'
|
publication_type = 'magazine'
|
||||||
auto_cleanup = True
|
auto_cleanup = True
|
||||||
cover_img_url = 'http://www.aksiyon.com.tr/aksiyon/images/aksiyon/top-page/aksiyon_top_r2_c1.jpg'
|
cover_img_url = 'http://www.aksiyon.com.tr/aksiyon/images/aksiyon/top-page/aksiyon_top_r2_c1.jpg'
|
||||||
masthead_url = 'http://aksiyon.com.tr/aksiyon/images/aksiyon/top-page/aksiyon_top_r2_c1.jpg'
|
masthead_url = 'http://aksiyon.com.tr/aksiyon/images/aksiyon/top-page/aksiyon_top_r2_c1.jpg'
|
||||||
ignore_duplicate_articles = { 'title', 'url' }
|
ignore_duplicate_articles = {'title', 'url'}
|
||||||
remove_empty_feeds= True
|
remove_empty_feeds = True
|
||||||
feeds = [
|
feeds = [
|
||||||
( u'KAPAK', u'http://www.aksiyon.com.tr/aksiyon/rss?sectionId=26'),
|
(u'KAPAK', u'http://www.aksiyon.com.tr/aksiyon/rss?sectionId=26'),
|
||||||
( u'ANASAYFA', u'http://www.aksiyon.com.tr/aksiyon/rss?sectionId=0'),
|
(u'ANASAYFA', u'http://www.aksiyon.com.tr/aksiyon/rss?sectionId=0'),
|
||||||
( u'EKONOMİ', u'http://www.aksiyon.com.tr/aksiyon/rss?sectionId=35'),
|
(u'EKONOMİ', u'http://www.aksiyon.com.tr/aksiyon/rss?sectionId=35'),
|
||||||
( u'EKOANALİZ', u'http://www.aksiyon.com.tr/aksiyon/rss?sectionId=284'),
|
(u'EKOANALİZ', u'http://www.aksiyon.com.tr/aksiyon/rss?sectionId=284'),
|
||||||
( u'YAZARLAR', u'http://www.aksiyon.com.tr/aksiyon/rss?sectionId=17'),
|
(u'YAZARLAR', u'http://www.aksiyon.com.tr/aksiyon/rss?sectionId=17'),
|
||||||
( u'KİTAPLIK', u'http://www.aksiyon.com.tr/aksiyon/rss?sectionId=13'),
|
(u'KİTAPLIK', u'http://www.aksiyon.com.tr/aksiyon/rss?sectionId=13'),
|
||||||
( u'SİNEMA', u'http://www.aksiyon.com.tr/aksiyon/rss?sectionId=14'),
|
(u'SİNEMA', u'http://www.aksiyon.com.tr/aksiyon/rss?sectionId=14'),
|
||||||
( u'ARKA PENCERE', u'http://www.aksiyon.com.tr/aksiyon/rss?sectionId=27'),
|
(u'ARKA PENCERE',
|
||||||
( u'DÜNYA', u'http://www.aksiyon.com.tr/aksiyon/rss?sectionId=32'),
|
u'http://www.aksiyon.com.tr/aksiyon/rss?sectionId=27'),
|
||||||
( u'DOSYALAR', u'http://www.aksiyon.com.tr/aksiyon/rss?sectionId=34'),
|
(u'DÜNYA', u'http://www.aksiyon.com.tr/aksiyon/rss?sectionId=32'),
|
||||||
( u'KARAKUTU', u'http://www.aksiyon.com.tr/aksiyon/rss?sectionId=11'),
|
(u'DOSYALAR', u'http://www.aksiyon.com.tr/aksiyon/rss?sectionId=34'),
|
||||||
( u'KÜLTÜR & SANAT', u'http://www.aksiyon.com.tr/aksiyon/rss?sectionId=12'),
|
(u'KARAKUTU', u'http://www.aksiyon.com.tr/aksiyon/rss?sectionId=11'),
|
||||||
( u'SPOR', u'http://www.aksiyon.com.tr/aksiyon/rss?sectionId=38'),
|
(u'KÜLTÜR & SANAT',
|
||||||
( u'BİLİŞİM - TEKNOLOJİ', u'http://www.aksiyon.com.tr/aksiyon/rss?sectionId=39'),
|
u'http://www.aksiyon.com.tr/aksiyon/rss?sectionId=12'),
|
||||||
( u'3. BOYUT', u'http://www.aksiyon.com.tr/aksiyon/rss?sectionId=172'),
|
(u'SPOR', u'http://www.aksiyon.com.tr/aksiyon/rss?sectionId=38'),
|
||||||
( u'HAYAT BİLGİSİ', u'http://www.aksiyon.com.tr/aksiyon/rss?sectionId=283'),
|
(u'BİLİŞİM - TEKNOLOJİ',
|
||||||
( u'İŞ DÜNYASI', u'http://www.aksiyon.com.tr/aksiyon/rss?sectionId=283'),
|
u'http://www.aksiyon.com.tr/aksiyon/rss?sectionId=39'),
|
||||||
]
|
(u'3. BOYUT', u'http://www.aksiyon.com.tr/aksiyon/rss?sectionId=172'),
|
||||||
|
(u'HAYAT BİLGİSİ',
|
||||||
#def print_version(self, url):
|
u'http://www.aksiyon.com.tr/aksiyon/rss?sectionId=283'),
|
||||||
#return url.replace('http://www.aksiyon.com.tr/aksiyon/newsDetail_getNewsById.action?load=detay&', 'http://www.aksiyon.com.tr/aksiyon/mobile_detailn.action?')
|
(u'İŞ DÜNYASI',
|
||||||
|
u'http://www.aksiyon.com.tr/aksiyon/rss?sectionId=283'),
|
||||||
|
]
|
||||||
|
|
||||||
|
# def print_version(self, url):
|
||||||
|
# return
|
||||||
|
# url.replace('http://www.aksiyon.com.tr/aksiyon/newsDetail_getNewsById.action?load=detay&',
|
||||||
|
# 'http://www.aksiyon.com.tr/aksiyon/mobile_detailn.action?')
|
||||||
|
@ -1,4 +1,4 @@
|
|||||||
__license__ = 'GPL v3'
|
__license__ = 'GPL v3'
|
||||||
__copyright__ = '2010-2012, Darko Miletic <darko.miletic at gmail.com>'
|
__copyright__ = '2010-2012, Darko Miletic <darko.miletic at gmail.com>'
|
||||||
'''
|
'''
|
||||||
akter.co.rs
|
akter.co.rs
|
||||||
@ -7,37 +7,35 @@ akter.co.rs
|
|||||||
import re
|
import re
|
||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
|
||||||
|
|
||||||
class Akter(BasicNewsRecipe):
|
class Akter(BasicNewsRecipe):
|
||||||
title = 'AKTER - Nedeljnik'
|
title = 'AKTER - Nedeljnik'
|
||||||
__author__ = 'Darko Miletic'
|
__author__ = 'Darko Miletic'
|
||||||
description = 'AKTER - nedeljni politicki magazin savremene Srbije'
|
description = 'AKTER - nedeljni politicki magazin savremene Srbije'
|
||||||
publisher = 'Akter Media Group d.o.o.'
|
publisher = 'Akter Media Group d.o.o.'
|
||||||
category = 'vesti, online vesti, najnovije vesti, politika, sport, ekonomija, biznis, finansije, berza, kultura, zivot, putovanja, auto, automobili, tehnologija, politicki magazin, dogadjaji, desavanja, lifestyle, zdravlje, zdravstvo, vest, novine, nedeljnik, srbija, novi sad, vojvodina, svet, drustvo, zabava, republika srpska, beograd, intervju, komentar, reportaza, arhiva vesti, news, serbia, politics'
|
category = 'vesti, online vesti, najnovije vesti, politika, sport, ekonomija, biznis, finansije, berza, kultura, zivot, putovanja, auto, automobili, tehnologija, politicki magazin, dogadjaji, desavanja, lifestyle, zdravlje, zdravstvo, vest, novine, nedeljnik, srbija, novi sad, vojvodina, svet, drustvo, zabava, republika srpska, beograd, intervju, komentar, reportaza, arhiva vesti, news, serbia, politics' # noqa
|
||||||
oldest_article = 8
|
oldest_article = 8
|
||||||
max_articles_per_feed = 100
|
max_articles_per_feed = 100
|
||||||
no_stylesheets = True
|
no_stylesheets = True
|
||||||
use_embedded_content = False
|
use_embedded_content = False
|
||||||
encoding = 'utf-8'
|
encoding = 'utf-8'
|
||||||
masthead_url = 'http://www.akter.co.rs/gfx/logoneover.png'
|
masthead_url = 'http://www.akter.co.rs/gfx/logoneover.png'
|
||||||
language = 'sr'
|
language = 'sr'
|
||||||
publication_type = 'magazine'
|
publication_type = 'magazine'
|
||||||
remove_empty_feeds = True
|
remove_empty_feeds = True
|
||||||
extra_css = """
|
extra_css = """
|
||||||
@font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)}
|
@font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)}
|
||||||
body{font-family: Tahoma,Geneva,sans1,sans-serif}
|
body{font-family: Tahoma,Geneva,sans1,sans-serif}
|
||||||
img{margin-bottom: 0.8em; display: block;}
|
img{margin-bottom: 0.8em; display: block;}
|
||||||
"""
|
"""
|
||||||
|
|
||||||
conversion_options = {
|
conversion_options = {
|
||||||
'comment' : description
|
'comment': description, 'tags': category, 'publisher': publisher, 'language': language
|
||||||
, 'tags' : category
|
}
|
||||||
, 'publisher': publisher
|
|
||||||
, 'language' : language
|
|
||||||
}
|
|
||||||
|
|
||||||
preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')]
|
preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')]
|
||||||
keep_only_tags = [dict(name='div', attrs={'id':'section_to_print'})]
|
keep_only_tags = [dict(name='div', attrs={'id': 'section_to_print'})]
|
||||||
feeds = [(u'Nedeljnik', u'http://akter.co.rs/rss/nedeljnik')]
|
feeds = [(u'Nedeljnik', u'http://akter.co.rs/rss/nedeljnik')]
|
||||||
|
|
||||||
def print_version(self, url):
|
def print_version(self, url):
|
||||||
dpart, spart, apart = url.rpartition('/')
|
dpart, spart, apart = url.rpartition('/')
|
||||||
@ -45,10 +43,9 @@ class Akter(BasicNewsRecipe):
|
|||||||
|
|
||||||
def get_cover_url(self):
|
def get_cover_url(self):
|
||||||
soup = self.index_to_soup('http://www.akter.co.rs/weekly.html')
|
soup = self.index_to_soup('http://www.akter.co.rs/weekly.html')
|
||||||
divt = soup.find('div', attrs={'class':'lastissue'})
|
divt = soup.find('div', attrs={'class': 'lastissue'})
|
||||||
if divt:
|
if divt:
|
||||||
imgt = divt.find('img')
|
imgt = divt.find('img')
|
||||||
if imgt:
|
if imgt:
|
||||||
return 'http://www.akter.co.rs' + imgt['src']
|
return 'http://www.akter.co.rs' + imgt['src']
|
||||||
return None
|
return None
|
||||||
|
|
||||||
|
@ -1,4 +1,4 @@
|
|||||||
__license__ = 'GPL v3'
|
__license__ = 'GPL v3'
|
||||||
__copyright__ = '2012, Darko Miletic <darko.miletic at gmail.com>'
|
__copyright__ = '2012, Darko Miletic <darko.miletic at gmail.com>'
|
||||||
'''
|
'''
|
||||||
akter.co.rs
|
akter.co.rs
|
||||||
@ -7,37 +7,34 @@ akter.co.rs
|
|||||||
import re
|
import re
|
||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
|
||||||
|
|
||||||
class Akter(BasicNewsRecipe):
|
class Akter(BasicNewsRecipe):
|
||||||
title = 'AKTER - Dnevnik'
|
title = 'AKTER - Dnevnik'
|
||||||
__author__ = 'Darko Miletic'
|
__author__ = 'Darko Miletic'
|
||||||
description = 'AKTER - Najnovije vesti iz Srbije'
|
description = 'AKTER - Najnovije vesti iz Srbije'
|
||||||
publisher = 'Akter Media Group d.o.o.'
|
publisher = 'Akter Media Group d.o.o.'
|
||||||
category = 'vesti, online vesti, najnovije vesti, politika, sport, ekonomija, biznis, finansije, berza, kultura, zivot, putovanja, auto, automobili, tehnologija, politicki magazin, dogadjaji, desavanja, lifestyle, zdravlje, zdravstvo, vest, novine, nedeljnik, srbija, novi sad, vojvodina, svet, drustvo, zabava, republika srpska, beograd, intervju, komentar, reportaza, arhiva vesti, news, serbia, politics'
|
oldest_article = 8
|
||||||
oldest_article = 8
|
|
||||||
max_articles_per_feed = 100
|
max_articles_per_feed = 100
|
||||||
no_stylesheets = True
|
no_stylesheets = True
|
||||||
use_embedded_content = False
|
use_embedded_content = False
|
||||||
encoding = 'utf-8'
|
encoding = 'utf-8'
|
||||||
masthead_url = 'http://www.akter.co.rs/gfx/logodnover.png'
|
masthead_url = 'http://www.akter.co.rs/gfx/logodnover.png'
|
||||||
language = 'sr'
|
language = 'sr'
|
||||||
publication_type = 'magazine'
|
publication_type = 'magazine'
|
||||||
remove_empty_feeds = True
|
remove_empty_feeds = True
|
||||||
extra_css = """
|
extra_css = """
|
||||||
@font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)}
|
@font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)}
|
||||||
body{font-family: Tahoma,Geneva,sans1,sans-serif}
|
body{font-family: Tahoma,Geneva,sans1,sans-serif}
|
||||||
img{margin-bottom: 0.8em; display: block;}
|
img{margin-bottom: 0.8em; display: block;}
|
||||||
"""
|
"""
|
||||||
|
|
||||||
conversion_options = {
|
conversion_options = {
|
||||||
'comment' : description
|
'comment': description, 'publisher': publisher, 'language': language
|
||||||
, 'tags' : category
|
}
|
||||||
, 'publisher': publisher
|
|
||||||
, 'language' : language
|
|
||||||
}
|
|
||||||
|
|
||||||
preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')]
|
preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')]
|
||||||
keep_only_tags = [dict(name='div', attrs={'id':'section_to_print'})]
|
keep_only_tags = [dict(name='div', attrs={'id': 'section_to_print'})]
|
||||||
feeds = [(u'Vesti', u'http://akter.co.rs/rss/dnevni')]
|
feeds = [(u'Vesti', u'http://akter.co.rs/rss/dnevni')]
|
||||||
|
|
||||||
def print_version(self, url):
|
def print_version(self, url):
|
||||||
dpart, spart, apart = url.rpartition('/')
|
dpart, spart, apart = url.rpartition('/')
|
||||||
|
@ -3,8 +3,9 @@ from __future__ import unicode_literals
|
|||||||
from calibre.web.feeds.recipes import BasicNewsRecipe
|
from calibre.web.feeds.recipes import BasicNewsRecipe
|
||||||
import re
|
import re
|
||||||
|
|
||||||
|
|
||||||
class aktualneRecipe(BasicNewsRecipe):
|
class aktualneRecipe(BasicNewsRecipe):
|
||||||
__author__ = 'bubak'
|
__author__ = 'bubak'
|
||||||
title = u'aktualne.cz'
|
title = u'aktualne.cz'
|
||||||
publisher = u'Centrum holdings'
|
publisher = u'Centrum holdings'
|
||||||
description = 'aktuálně.cz'
|
description = 'aktuálně.cz'
|
||||||
@ -13,13 +14,13 @@ class aktualneRecipe(BasicNewsRecipe):
|
|||||||
encoding = 'utf-8'
|
encoding = 'utf-8'
|
||||||
|
|
||||||
feeds = [
|
feeds = [
|
||||||
(u'Domácí', u'http://aktualne.centrum.cz/feeds/rss/domaci/?photo=0'),
|
(u'Domácí', u'http://aktualne.centrum.cz/feeds/rss/domaci/?photo=0'),
|
||||||
(u'Zprávy', u'http://aktualne.centrum.cz/feeds/rss/zpravy/?photo=0'),
|
(u'Zprávy', u'http://aktualne.centrum.cz/feeds/rss/zpravy/?photo=0'),
|
||||||
(u'Praha', u'http://aktualne.centrum.cz/feeds/rss/domaci/regiony/praha/?photo=0'),
|
(u'Praha', u'http://aktualne.centrum.cz/feeds/rss/domaci/regiony/praha/?photo=0'),
|
||||||
(u'Ekonomika', u'http://aktualne.centrum.cz/feeds/rss/ekonomika/?photo=0'),
|
(u'Ekonomika', u'http://aktualne.centrum.cz/feeds/rss/ekonomika/?photo=0'),
|
||||||
(u'Finance', u'http://aktualne.centrum.cz/feeds/rss/finance/?photo=0'),
|
(u'Finance', u'http://aktualne.centrum.cz/feeds/rss/finance/?photo=0'),
|
||||||
(u'Blogy a názory', u'http://blog.aktualne.centrum.cz/export-all.php')
|
(u'Blogy a názory', u'http://blog.aktualne.centrum.cz/export-all.php')
|
||||||
]
|
]
|
||||||
|
|
||||||
language = 'cs'
|
language = 'cs'
|
||||||
cover_url = 'http://img.aktualne.centrum.cz/design/akt4/o/l/logo-akt-ciste.png'
|
cover_url = 'http://img.aktualne.centrum.cz/design/akt4/o/l/logo-akt-ciste.png'
|
||||||
@ -27,29 +28,31 @@ class aktualneRecipe(BasicNewsRecipe):
|
|||||||
no_stylesheets = True
|
no_stylesheets = True
|
||||||
|
|
||||||
remove_attributes = []
|
remove_attributes = []
|
||||||
remove_tags_before = dict(name='h1', attrs={'class':['titulek-clanku']})
|
remove_tags_before = dict(name='h1', attrs={'class': ['titulek-clanku']})
|
||||||
filter_regexps = [r'img.aktualne.centrum.cz']
|
filter_regexps = [r'img.aktualne.centrum.cz']
|
||||||
remove_tags = [dict(name='div', attrs={'id':['social-bookmark']}),
|
remove_tags = [dict(name='div', attrs={'id': ['social-bookmark']}),
|
||||||
dict(name='div', attrs={'class':['box1', 'svazane-tagy']}),
|
dict(name='div', attrs={'class': ['box1', 'svazane-tagy']}),
|
||||||
dict(name='div', attrs={'class':'itemcomment id0'}),
|
dict(name='div', attrs={'class': 'itemcomment id0'}),
|
||||||
dict(name='div', attrs={'class':'hlavicka'}),
|
dict(name='div', attrs={'class': 'hlavicka'}),
|
||||||
dict(name='div', attrs={'class':'hlavni-menu'}),
|
dict(name='div', attrs={'class': 'hlavni-menu'}),
|
||||||
dict(name='div', attrs={'class':'top-standard-brand-obal'}),
|
dict(name='div', attrs={
|
||||||
dict(name='div', attrs={'class':'breadcrumb'}),
|
'class': 'top-standard-brand-obal'}),
|
||||||
dict(name='div', attrs={'id':'start-standard'}),
|
dict(name='div', attrs={'class': 'breadcrumb'}),
|
||||||
dict(name='div', attrs={'id':'forum'}),
|
dict(name='div', attrs={'id': 'start-standard'}),
|
||||||
dict(name='span', attrs={'class':'akce'}),
|
dict(name='div', attrs={'id': 'forum'}),
|
||||||
dict(name='span', attrs={'class':'odrazka vetsi'}),
|
dict(name='span', attrs={'class': 'akce'}),
|
||||||
dict(name='div', attrs={'class':'boxP'}),
|
dict(name='span', attrs={'class': 'odrazka vetsi'}),
|
||||||
dict(name='div', attrs={'class':'box2'})]
|
dict(name='div', attrs={'class': 'boxP'}),
|
||||||
|
dict(name='div', attrs={'class': 'box2'})]
|
||||||
preprocess_regexps = [
|
preprocess_regexps = [
|
||||||
(re.compile(r'<div class="(contenttitle"|socialni-site|wiki|facebook-promo|facebook-like-button"|meta-akce).*',
|
(re.compile(r'<div class="(contenttitle"|socialni-site|wiki|facebook-promo|facebook-like-button"|meta-akce).*',
|
||||||
re.DOTALL|re.IGNORECASE), lambda match: '</body>'),
|
re.DOTALL | re.IGNORECASE), lambda match: '</body>'),
|
||||||
(re.compile(r'<div class="[^"]*poutak-clanek-trojka".*', re.DOTALL|re.IGNORECASE), lambda match: '</body>')]
|
(re.compile(r'<div class="[^"]*poutak-clanek-trojka".*', re.DOTALL | re.IGNORECASE), lambda match: '</body>')]
|
||||||
|
|
||||||
keep_only_tags = []
|
keep_only_tags = []
|
||||||
|
|
||||||
visited_urls = {}
|
visited_urls = {}
|
||||||
|
|
||||||
def get_article_url(self, article):
|
def get_article_url(self, article):
|
||||||
url = BasicNewsRecipe.get_article_url(self, article)
|
url = BasicNewsRecipe.get_article_url(self, article)
|
||||||
if url in self.visited_urls:
|
if url in self.visited_urls:
|
||||||
|
@ -1,66 +1,76 @@
|
|||||||
# coding=utf-8
|
# coding=utf-8
|
||||||
__license__ = 'GPL v3'
|
__license__ = 'GPL v3'
|
||||||
__copyright__ = '2011-2016, Hassan Williamson <haz at hazrpg.co.uk>'
|
__copyright__ = '2011-2016, Hassan Williamson <haz at hazrpg.co.uk>'
|
||||||
'''
|
'''
|
||||||
ahram.org.eg
|
ahram.org.eg
|
||||||
'''
|
'''
|
||||||
from calibre.web.feeds.recipes import BasicNewsRecipe
|
from calibre.web.feeds.recipes import BasicNewsRecipe
|
||||||
|
|
||||||
|
|
||||||
class AlAhram(BasicNewsRecipe):
|
class AlAhram(BasicNewsRecipe):
|
||||||
title = u'Al-Ahram (الأهرام)'
|
title = u'Al-Ahram (الأهرام)'
|
||||||
__author__ = 'Hassan Williamson'
|
__author__ = 'Hassan Williamson'
|
||||||
description = 'The Arabic version of the Al-Ahram newspaper.'
|
description = 'The Arabic version of the Al-Ahram newspaper.'
|
||||||
language = 'ar'
|
language = 'ar'
|
||||||
encoding = 'utf8'
|
encoding = 'utf8'
|
||||||
cover_url = 'http://www.ahram.org.eg/Media/News/2015/3/14/2015-635619650946000713-600.jpg'
|
cover_url = 'http://www.ahram.org.eg/Media/News/2015/3/14/2015-635619650946000713-600.jpg'
|
||||||
oldest_article = 7
|
oldest_article = 7
|
||||||
max_articles_per_feed = 100
|
max_articles_per_feed = 100
|
||||||
no_stylesheets = True
|
no_stylesheets = True
|
||||||
#delay = 1
|
use_embedded_content = False
|
||||||
use_embedded_content = False
|
publisher = 'Al-Ahram'
|
||||||
publisher = 'Al-Ahram'
|
category = 'News'
|
||||||
category = 'News'
|
publication_type = 'newsportal'
|
||||||
publication_type = 'newsportal'
|
|
||||||
|
|
||||||
extra_css = ' body{ font-family: Verdana,Helvetica,Arial,sans-serif; direction: rtl; } .bbtitle{ font-weight: bold; font-size: 2em; } .bbsubtitle{ font-size: 1.3em; } #WriterImage{ height: 10px; } '
|
extra_css = ' body{ font-family: Verdana,Helvetica,Arial,sans-serif; direction: rtl; } .bbtitle{ font-weight: bold; font-size: 2em; } .bbsubtitle{ font-size: 1.3em; } #WriterImage{ height: 10px; } ' # noqa
|
||||||
|
|
||||||
keep_only_tags = [
|
keep_only_tags = [
|
||||||
dict(name='div', attrs={'class':['bbcolright']})
|
dict(name='div', attrs={'class': ['bbcolright']})
|
||||||
]
|
]
|
||||||
|
|
||||||
remove_tags = [
|
remove_tags = [
|
||||||
dict(name='div', attrs={'class':['bbnav', 'bbsp']}),
|
dict(name='div', attrs={'class': ['bbnav', 'bbsp']}),
|
||||||
dict(name='div', attrs={'id':['AddThisButton']}),
|
dict(name='div', attrs={'id': ['AddThisButton']}),
|
||||||
dict(name='a', attrs={'class':['twitter-share-button']}),
|
dict(name='a', attrs={'class': ['twitter-share-button']}),
|
||||||
dict(name='div', attrs={'id':['ReaderCount']}),
|
dict(name='div', attrs={'id': ['ReaderCount']}),
|
||||||
]
|
]
|
||||||
|
|
||||||
remove_attributes = [
|
remove_attributes = [
|
||||||
'width','height','style'
|
'width', 'height', 'style'
|
||||||
]
|
]
|
||||||
|
|
||||||
feeds = [
|
feeds = [
|
||||||
(u'الأولى', 'http://www.ahram.org.eg/archive/RssXml.aspx?CategoryID=25'),
|
(u'الأولى', 'http://www.ahram.org.eg/archive/RssXml.aspx?CategoryID=25'),
|
||||||
(u'الصفحة الثانية', 'http://www.ahram.org.eg/archive/RssXml.aspx?CategoryID=74'),
|
(u'الصفحة الثانية',
|
||||||
(u'مصر', 'http://www.ahram.org.eg/archive/RssXml.aspx?CategoryID=27'),
|
'http://www.ahram.org.eg/archive/RssXml.aspx?CategoryID=74'),
|
||||||
(u'المشهد السياسي', 'http://www.ahram.org.eg/archive/RssXml.aspx?CategoryID=60'),
|
(u'مصر', 'http://www.ahram.org.eg/archive/RssXml.aspx?CategoryID=27'),
|
||||||
(u'المحافظات', 'http://www.ahram.org.eg/archive/RssXml.aspx?CategoryID=29'),
|
(u'المشهد السياسي',
|
||||||
(u'الوطن العربي', 'http://www.ahram.org.eg/archive/RssXml.aspx?CategoryID=31'),
|
'http://www.ahram.org.eg/archive/RssXml.aspx?CategoryID=60'),
|
||||||
(u'العالم', 'http://www.ahram.org.eg/archive/RssXml.aspx?CategoryID=26'),
|
(u'المحافظات', 'http://www.ahram.org.eg/archive/RssXml.aspx?CategoryID=29'),
|
||||||
(u'تقارير المراسلين', 'http://www.ahram.org.eg/archive/RssXml.aspx?CategoryID=2'),
|
(u'الوطن العربي',
|
||||||
(u'تحقيقات', 'http://www.ahram.org.eg/archive/RssXml.aspx?CategoryID=3'),
|
'http://www.ahram.org.eg/archive/RssXml.aspx?CategoryID=31'),
|
||||||
(u'قضايا واراء', 'http://www.ahram.org.eg/archive/RssXml.aspx?CategoryID=4'),
|
(u'العالم', 'http://www.ahram.org.eg/archive/RssXml.aspx?CategoryID=26'),
|
||||||
(u'اقتصاد', 'http://www.ahram.org.eg/archive/RssXml.aspx?CategoryID=5'),
|
(u'تقارير المراسلين',
|
||||||
(u'رياضة', 'http://www.ahram.org.eg/archive/RssXml.aspx?CategoryID=6'),
|
'http://www.ahram.org.eg/archive/RssXml.aspx?CategoryID=2'),
|
||||||
(u'حوادث', 'http://www.ahram.org.eg/archive/RssXml.aspx?CategoryID=38'),
|
(u'تحقيقات', 'http://www.ahram.org.eg/archive/RssXml.aspx?CategoryID=3'),
|
||||||
(u'دنيا الثقافة', 'http://www.ahram.org.eg/archive/RssXml.aspx?CategoryID=7'),
|
(u'قضايا واراء',
|
||||||
(u'المراة والطفل', 'http://www.ahram.org.eg/archive/RssXml.aspx?CategoryID=8'),
|
'http://www.ahram.org.eg/archive/RssXml.aspx?CategoryID=4'),
|
||||||
(u'يوم جديد', 'http://www.ahram.org.eg/archive/RssXml.aspx?CategoryID=9'),
|
(u'اقتصاد', 'http://www.ahram.org.eg/archive/RssXml.aspx?CategoryID=5'),
|
||||||
(u'الكتاب', 'http://www.ahram.org.eg/archive/RssXml.aspx?CategoryID=10'),
|
(u'رياضة', 'http://www.ahram.org.eg/archive/RssXml.aspx?CategoryID=6'),
|
||||||
(u'الاعمدة', 'http://www.ahram.org.eg/archive/RssXml.aspx?CategoryID=11'),
|
(u'حوادث', 'http://www.ahram.org.eg/archive/RssXml.aspx?CategoryID=38'),
|
||||||
(u'أراء حرة', 'http://www.ahram.org.eg/archive/RssXml.aspx?CategoryID=59'),
|
(u'دنيا الثقافة',
|
||||||
(u'ملفات الاهرام', 'http://www.ahram.org.eg/archive/RssXml.aspx?CategoryID=12'),
|
'http://www.ahram.org.eg/archive/RssXml.aspx?CategoryID=7'),
|
||||||
(u'بريد الاهرام', 'http://www.ahram.org.eg/archive/RssXml.aspx?CategoryID=15'),
|
(u'المراة والطفل',
|
||||||
(u'برلمان الثورة', 'http://www.ahram.org.eg/archive/RssXml.aspx?CategoryID=61'),
|
'http://www.ahram.org.eg/archive/RssXml.aspx?CategoryID=8'),
|
||||||
(u'الاخيرة', 'http://www.ahram.org.eg/archive/RssXml.aspx?CategoryID=16'),
|
(u'يوم جديد', 'http://www.ahram.org.eg/archive/RssXml.aspx?CategoryID=9'),
|
||||||
]
|
(u'الكتاب', 'http://www.ahram.org.eg/archive/RssXml.aspx?CategoryID=10'),
|
||||||
|
(u'الاعمدة', 'http://www.ahram.org.eg/archive/RssXml.aspx?CategoryID=11'),
|
||||||
|
(u'أراء حرة', 'http://www.ahram.org.eg/archive/RssXml.aspx?CategoryID=59'),
|
||||||
|
(u'ملفات الاهرام',
|
||||||
|
'http://www.ahram.org.eg/archive/RssXml.aspx?CategoryID=12'),
|
||||||
|
(u'بريد الاهرام',
|
||||||
|
'http://www.ahram.org.eg/archive/RssXml.aspx?CategoryID=15'),
|
||||||
|
(u'برلمان الثورة',
|
||||||
|
'http://www.ahram.org.eg/archive/RssXml.aspx?CategoryID=61'),
|
||||||
|
(u'الاخيرة', 'http://www.ahram.org.eg/archive/RssXml.aspx?CategoryID=16'),
|
||||||
|
]
|
||||||
|
@ -1,4 +1,4 @@
|
|||||||
__license__ = 'GPL v3'
|
__license__ = 'GPL v3'
|
||||||
__copyright__ = '2009-2010, Darko Miletic <darko.miletic at gmail.com>'
|
__copyright__ = '2009-2010, Darko Miletic <darko.miletic at gmail.com>'
|
||||||
|
|
||||||
'''
|
'''
|
||||||
@ -6,57 +6,62 @@ english.aljazeera.net
|
|||||||
'''
|
'''
|
||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
|
||||||
|
|
||||||
def has_cls(x):
|
def has_cls(x):
|
||||||
return dict(attrs={'class':lambda cls: cls and x in cls.split()})
|
return dict(attrs={'class': lambda cls: cls and x in cls.split()})
|
||||||
|
|
||||||
|
|
||||||
class AlJazeera(BasicNewsRecipe):
|
class AlJazeera(BasicNewsRecipe):
|
||||||
title = 'Al Jazeera in English'
|
title = 'Al Jazeera in English'
|
||||||
__author__ = 'Darko Miletic'
|
__author__ = 'Darko Miletic'
|
||||||
description = 'News from Middle East'
|
description = 'News from Middle East'
|
||||||
language = 'en'
|
language = 'en'
|
||||||
publisher = 'Al Jazeera'
|
publisher = 'Al Jazeera'
|
||||||
category = 'news, politics, middle east'
|
category = 'news, politics, middle east'
|
||||||
delay = 1
|
delay = 1
|
||||||
oldest_article = 2
|
oldest_article = 2
|
||||||
max_articles_per_feed = 100
|
max_articles_per_feed = 100
|
||||||
no_stylesheets = True
|
no_stylesheets = True
|
||||||
use_embedded_content = False
|
use_embedded_content = False
|
||||||
extra_css = """
|
extra_css = """
|
||||||
body{font-family: Arial,sans-serif}
|
body{font-family: Arial,sans-serif}
|
||||||
#ctl00_cphBody_dvSummary{font-weight: bold}
|
#ctl00_cphBody_dvSummary{font-weight: bold}
|
||||||
#dvArticleDate{font-size: small; color: #999999}
|
#dvArticleDate{font-size: small; color: #999999}
|
||||||
"""
|
"""
|
||||||
conversion_options = {
|
conversion_options = {
|
||||||
'comment' : description , 'tags' : category ,
|
'comment': description, 'tags': category,
|
||||||
'publisher' : publisher , 'language' : language
|
'publisher': publisher, 'language': language
|
||||||
}
|
}
|
||||||
keep_only_tags = [
|
keep_only_tags = [
|
||||||
dict(id='main-story'),
|
dict(id='main-story'),
|
||||||
]
|
]
|
||||||
|
|
||||||
remove_tags = [
|
remove_tags = [
|
||||||
has_cls('MoreOnTheStory'), has_cls('ArticleBottomToolbar'), dict(smtitle="ShowMore"),
|
has_cls('MoreOnTheStory'), has_cls(
|
||||||
dict(name=['object','link','table','meta','base','iframe','embed']),
|
'ArticleBottomToolbar'), dict(smtitle="ShowMore"),
|
||||||
|
dict(name=['object', 'link', 'table',
|
||||||
|
'meta', 'base', 'iframe', 'embed']),
|
||||||
]
|
]
|
||||||
|
|
||||||
feeds = [(u'Al Jazeera English', u'http://english.aljazeera.net/Services/Rss/?PostingId=2007731105943979989')]
|
feeds = [(u'Al Jazeera English',
|
||||||
|
u'http://english.aljazeera.net/Services/Rss/?PostingId=2007731105943979989')]
|
||||||
|
|
||||||
def get_article_url(self, article):
|
def get_article_url(self, article):
|
||||||
artlurl = article.get('link', None)
|
artlurl = article.get('link', None)
|
||||||
return artlurl.replace('http://english.aljazeera.net//','http://english.aljazeera.net/')
|
return artlurl.replace('http://english.aljazeera.net//', 'http://english.aljazeera.net/')
|
||||||
|
|
||||||
def preprocess_html(self, soup):
|
def preprocess_html(self, soup):
|
||||||
for item in soup.findAll(style=True):
|
for item in soup.findAll(style=True):
|
||||||
del item['style']
|
del item['style']
|
||||||
for item in soup.findAll(face=True):
|
for item in soup.findAll(face=True):
|
||||||
del item['face']
|
del item['face']
|
||||||
td = soup.find('td',attrs={'class':'DetailedSummary'})
|
td = soup.find('td', attrs={'class': 'DetailedSummary'})
|
||||||
if td:
|
if td:
|
||||||
td.name = 'div'
|
td.name = 'div'
|
||||||
spn = soup.find('span',attrs={'id':'DetailedTitle'})
|
spn = soup.find('span', attrs={'id': 'DetailedTitle'})
|
||||||
if spn:
|
if spn:
|
||||||
spn.name='h1'
|
spn.name = 'h1'
|
||||||
for itm in soup.findAll('span', attrs={'id':['dvArticleDate','ctl00_cphBody_lblDate']}):
|
for itm in soup.findAll('span', attrs={'id': ['dvArticleDate', 'ctl00_cphBody_lblDate']}):
|
||||||
itm.name = 'div'
|
itm.name = 'div'
|
||||||
for alink in soup.findAll('a'):
|
for alink in soup.findAll('a'):
|
||||||
if alink.string is not None:
|
if alink.string is not None:
|
||||||
|
@ -1,79 +1,85 @@
|
|||||||
# coding=utf-8
|
# coding=utf-8
|
||||||
__license__ = 'GPL v3'
|
__license__ = 'GPL v3'
|
||||||
__copyright__ = '2016, Hassan Williamson <haz at hazrpg.co.uk>'
|
__copyright__ = '2016, Hassan Williamson <haz at hazrpg.co.uk>'
|
||||||
'''
|
'''
|
||||||
almasryalyoum.com
|
almasryalyoum.com
|
||||||
'''
|
'''
|
||||||
from calibre.web.feeds.recipes import BasicNewsRecipe
|
from calibre.web.feeds.recipes import BasicNewsRecipe
|
||||||
|
|
||||||
|
|
||||||
class AlMasryAlyoum(BasicNewsRecipe):
|
class AlMasryAlyoum(BasicNewsRecipe):
|
||||||
title = u'Al-Masry Alyoum (المصري اليوم)'
|
title = u'Al-Masry Alyoum (المصري اليوم)'
|
||||||
__author__ = 'Hassan Williamson'
|
__author__ = 'Hassan Williamson'
|
||||||
description = 'The Arabic version of the Al-Masry Alyoum (Egypt Independent) newspaper.'
|
description = 'The Arabic version of the Al-Masry Alyoum (Egypt Independent) newspaper.'
|
||||||
language = 'ar'
|
language = 'ar'
|
||||||
encoding = 'utf8'
|
encoding = 'utf8'
|
||||||
cover_url = 'http://www.almasryalyoum.com/content/images/header_logo.png'
|
cover_url = 'http://www.almasryalyoum.com/content/images/header_logo.png'
|
||||||
oldest_article = 7
|
oldest_article = 7
|
||||||
max_articles_per_feed = 100
|
max_articles_per_feed = 100
|
||||||
no_stylesheets = True
|
no_stylesheets = True
|
||||||
#delay = 1
|
use_embedded_content = False
|
||||||
use_embedded_content = False
|
publisher = 'Al-Masry Alyoum'
|
||||||
publisher = 'Al-Masry Alyoum'
|
category = 'News'
|
||||||
category = 'News'
|
publication_type = 'newsportal'
|
||||||
publication_type = 'newsportal'
|
|
||||||
|
|
||||||
extra_css = ' body{ font-family: Verdana,Helvetica,Arial,sans-serif; direction: rtl; } .tit_2{ font-weight: bold; font-size: 2em; } .pinfo{ font-size: 1.3em; } .articleimg img{ max-width: 100%; } .imgauther{ display: block; font-size: 0.7em; } .caption{ font-size: 0.7em; } '
|
extra_css = ' body{ font-family: Verdana,Helvetica,Arial,sans-serif; direction: rtl; } .tit_2{ font-weight: bold; font-size: 2em; } .pinfo{ font-size: 1.3em; } .articleimg img{ max-width: 100%; } .imgauther{ display: block; font-size: 0.7em; } .caption{ font-size: 0.7em; } ' # noqa
|
||||||
|
|
||||||
keep_only_tags = [
|
keep_only_tags = [
|
||||||
dict(name='div', attrs={'class':['article']})
|
dict(name='div', attrs={'class': ['article']})
|
||||||
]
|
]
|
||||||
|
|
||||||
remove_tags = [
|
remove_tags = [
|
||||||
dict(name='div', attrs={'class':['share_buttons_container']}),
|
dict(name='div', attrs={'class': ['share_buttons_container']}),
|
||||||
dict(name='div', attrs={'class':['min_related']}),
|
dict(name='div', attrs={'class': ['min_related']}),
|
||||||
dict(name='div', attrs={'id':['feedback']}),
|
dict(name='div', attrs={'id': ['feedback']}),
|
||||||
dict(name='div', attrs={'class':['news_SMSBox']}),
|
dict(name='div', attrs={'class': ['news_SMSBox']}),
|
||||||
dict(name='div', attrs={'class':['tags']}),
|
dict(name='div', attrs={'class': ['tags']}),
|
||||||
dict(name='div', attrs={'class':['ads', 'y_logo_news']}),
|
dict(name='div', attrs={'class': ['ads', 'y_logo_news']}),
|
||||||
dict(name='div', attrs={'class':['ads']}),
|
dict(name='div', attrs={'class': ['ads']}),
|
||||||
dict(name='div', attrs={'class':['option']}),
|
dict(name='div', attrs={'class': ['option']}),
|
||||||
dict(name='div', attrs={'class':['seealso']}),
|
dict(name='div', attrs={'class': ['seealso']}),
|
||||||
dict(name='div', attrs={'id':['comments']}),
|
dict(name='div', attrs={'id': ['comments']}),
|
||||||
]
|
]
|
||||||
|
|
||||||
remove_attributes = [
|
remove_attributes = [
|
||||||
'width','height','style'
|
'width', 'height', 'style'
|
||||||
]
|
]
|
||||||
|
|
||||||
feeds = [
|
feeds = [
|
||||||
(u'أخر الأخبار', 'http://www.almasryalyoum.com/rss/RssFeeds'),
|
(u'أخر الأخبار', 'http://www.almasryalyoum.com/rss/RssFeeds'),
|
||||||
(u'الصفحة الرئيسية', 'http://www.almasryalyoum.com/rss/RssFeeds?homePage=true'),
|
(u'الصفحة الرئيسية',
|
||||||
(u'أقلام وآراء', 'http://www.almasryalyoum.com/rss/RssFeeds?typeId=2&homePage=false'),
|
'http://www.almasryalyoum.com/rss/RssFeeds?homePage=true'),
|
||||||
(u'أخبار مصر', 'http://www.almasryalyoum.com/rss/RssFeeds?sectionId=3'),
|
(u'أقلام وآراء', 'http://www.almasryalyoum.com/rss/RssFeeds?typeId=2&homePage=false'),
|
||||||
(u'رياضة', 'http://www.almasryalyoum.com/rss/RssFeeds?sectionId=8'),
|
(u'أخبار مصر', 'http://www.almasryalyoum.com/rss/RssFeeds?sectionId=3'),
|
||||||
(u'اقتصاد', 'http://www.almasryalyoum.com/rss/RssFeeds?sectionId=4'),
|
(u'رياضة', 'http://www.almasryalyoum.com/rss/RssFeeds?sectionId=8'),
|
||||||
(u'حوادث', 'http://www.almasryalyoum.com/rss/RssFeeds?sectionId=7'),
|
(u'اقتصاد', 'http://www.almasryalyoum.com/rss/RssFeeds?sectionId=4'),
|
||||||
(u'فنون', 'http://www.almasryalyoum.com/rss/RssFeeds?sectionId=10'),
|
(u'حوادث', 'http://www.almasryalyoum.com/rss/RssFeeds?sectionId=7'),
|
||||||
(u'منوعاتنون', 'http://www.almasryalyoum.com/rss/RssFeeds?sectionId=12'),
|
(u'فنون', 'http://www.almasryalyoum.com/rss/RssFeeds?sectionId=10'),
|
||||||
(u'ثقافة', 'http://www.almasryalyoum.com/rss/RssFeeds?sectionId=6'),
|
(u'منوعاتنون', 'http://www.almasryalyoum.com/rss/RssFeeds?sectionId=12'),
|
||||||
(u'علوم وتكنولوجيا', 'http://www.almasryalyoum.com/rss/RssFeeds?sectionId=9'),
|
(u'ثقافة', 'http://www.almasryalyoum.com/rss/RssFeeds?sectionId=6'),
|
||||||
(u'تحقيقات وحوارات', 'http://www.almasryalyoum.com/rss/RssFeeds?sectionId=5'),
|
(u'علوم وتكنولوجيا',
|
||||||
(u'المرأة', 'http://www.almasryalyoum.com/rss/RssFeeds?sectionId=69'),
|
'http://www.almasryalyoum.com/rss/RssFeeds?sectionId=9'),
|
||||||
(u'رأي', 'http://www.almasryalyoum.com/rss/RssFeeds?sectionId=2'),
|
(u'تحقيقات وحوارات',
|
||||||
(u'وسط الناس', 'http://www.almasryalyoum.com/rss/RssFeeds?sectionId=13'),
|
'http://www.almasryalyoum.com/rss/RssFeeds?sectionId=5'),
|
||||||
(u'مركز المصري للدراسات و المعلومات', 'http://www.almasryalyoum.com/rss/RssFeeds?sectionId=56'),
|
(u'المرأة', 'http://www.almasryalyoum.com/rss/RssFeeds?sectionId=69'),
|
||||||
(u'مطبخ', 'http://www.almasryalyoum.com/rss/RssFeeds?sectionId=81'),
|
(u'رأي', 'http://www.almasryalyoum.com/rss/RssFeeds?sectionId=2'),
|
||||||
(u'برلمان مصر', 'http://www.almasryalyoum.com/rss/RssFeeds?sectionId=78'),
|
(u'وسط الناس', 'http://www.almasryalyoum.com/rss/RssFeeds?sectionId=13'),
|
||||||
(u'تقارير', 'http://www.almasryalyoum.com/rss/RssFeeds?sectionId=54'),
|
(u'مركز المصري للدراسات و المعلومات',
|
||||||
(u'تحليلات', 'http://www.almasryalyoum.com/rss/RssFeeds?sectionId=60'),
|
'http://www.almasryalyoum.com/rss/RssFeeds?sectionId=56'),
|
||||||
(u'عروض نقدية', 'http://www.almasryalyoum.com/rss/RssFeeds?sectionId=61'),
|
(u'مطبخ', 'http://www.almasryalyoum.com/rss/RssFeeds?sectionId=81'),
|
||||||
(u'دراسات', 'http://www.almasryalyoum.com/rss/RssFeeds?sectionId=62'),
|
(u'برلمان مصر', 'http://www.almasryalyoum.com/rss/RssFeeds?sectionId=78'),
|
||||||
(u'كتاب المصري اليوم', 'http://www.almasryalyoum.com/rss/RssFeeds?sectionId=65'),
|
(u'تقارير', 'http://www.almasryalyoum.com/rss/RssFeeds?sectionId=54'),
|
||||||
(u'فعاليات', 'http://www.almasryalyoum.com/rss/RssFeeds?sectionId=66'),
|
(u'تحليلات', 'http://www.almasryalyoum.com/rss/RssFeeds?sectionId=60'),
|
||||||
(u'إسلامي', 'http://www.almasryalyoum.com/rss/RssFeeds?sectionId=75'),
|
(u'عروض نقدية', 'http://www.almasryalyoum.com/rss/RssFeeds?sectionId=61'),
|
||||||
(u'مطبخي', 'http://www.almasryalyoum.com/rss/RssFeeds?sectionId=76'),
|
(u'دراسات', 'http://www.almasryalyoum.com/rss/RssFeeds?sectionId=62'),
|
||||||
(u'مسلسلاتيطبخي', 'http://www.almasryalyoum.com/rss/RssFeeds?sectionId=77'),
|
(u'كتاب المصري اليوم',
|
||||||
(u'رمضان زمان', 'http://www.almasryalyoum.com/rss/RssFeeds?sectionId=82'),
|
'http://www.almasryalyoum.com/rss/RssFeeds?sectionId=65'),
|
||||||
(u'تقارير', 'http://www.almasryalyoum.com/rss/RssFeeds?sectionId=85'),
|
(u'فعاليات', 'http://www.almasryalyoum.com/rss/RssFeeds?sectionId=66'),
|
||||||
(u'سيارات', 'http://www.almasryalyoum.com/rss/RssFeeds?sectionId=86'),
|
(u'إسلامي', 'http://www.almasryalyoum.com/rss/RssFeeds?sectionId=75'),
|
||||||
]
|
(u'مطبخي', 'http://www.almasryalyoum.com/rss/RssFeeds?sectionId=76'),
|
||||||
|
(u'مسلسلاتيطبخي',
|
||||||
|
'http://www.almasryalyoum.com/rss/RssFeeds?sectionId=77'),
|
||||||
|
(u'رمضان زمان', 'http://www.almasryalyoum.com/rss/RssFeeds?sectionId=82'),
|
||||||
|
(u'تقارير', 'http://www.almasryalyoum.com/rss/RssFeeds?sectionId=85'),
|
||||||
|
(u'سيارات', 'http://www.almasryalyoum.com/rss/RssFeeds?sectionId=86'),
|
||||||
|
]
|
||||||
|
@ -1,14 +1,18 @@
|
|||||||
#!/usr/bin/env python2
|
#!/usr/bin/env python2
|
||||||
# -*- coding: utf-8 -*-
|
# -*- coding: utf-8 -*-
|
||||||
__license__ = 'GPL v3'
|
__license__ = 'GPL v3'
|
||||||
__copyright__ = '2014, spswerling'
|
__copyright__ = '2014, spswerling'
|
||||||
'''
|
'''
|
||||||
http://www.al-monitor.com/
|
http://www.al-monitor.com/
|
||||||
'''
|
'''
|
||||||
import string, inspect, datetime, re
|
import string
|
||||||
|
import inspect
|
||||||
|
import datetime
|
||||||
|
import re
|
||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
from calibre.ebooks.BeautifulSoup import BeautifulSoup
|
from calibre.ebooks.BeautifulSoup import BeautifulSoup
|
||||||
|
|
||||||
|
|
||||||
class AlMonitor(BasicNewsRecipe):
|
class AlMonitor(BasicNewsRecipe):
|
||||||
title = u'Al Monitor'
|
title = u'Al Monitor'
|
||||||
__author__ = u'spswerling'
|
__author__ = u'spswerling'
|
||||||
@ -26,39 +30,39 @@ class AlMonitor(BasicNewsRecipe):
|
|||||||
recursions = 0
|
recursions = 0
|
||||||
compress_news_images = True
|
compress_news_images = True
|
||||||
compress_news_images_max_size = 7
|
compress_news_images_max_size = 7
|
||||||
scale_news_images = (150,200) # (kindle touch: 600x800)
|
scale_news_images = (150, 200) # (kindle touch: 600x800)
|
||||||
useHighResImages = False
|
useHighResImages = False
|
||||||
oldest_article = 1.5
|
oldest_article = 1.5
|
||||||
max_articles_per_section = 15
|
max_articles_per_section = 15
|
||||||
|
|
||||||
sections = [
|
sections = [
|
||||||
(u'egypt',u'http://www.al-monitor.com/pulse/egypt-pulse'),
|
(u'egypt', u'http://www.al-monitor.com/pulse/egypt-pulse'),
|
||||||
(u'gulf',u'http://www.al-monitor.com/pulse/gulf-pulse'),
|
(u'gulf', u'http://www.al-monitor.com/pulse/gulf-pulse'),
|
||||||
(u'iran',u'http://www.al-monitor.com/pulse/iran-pulse'),
|
(u'iran', u'http://www.al-monitor.com/pulse/iran-pulse'),
|
||||||
(u'iraq',u'http://www.al-monitor.com/pulse/iraq-pulse'),
|
(u'iraq', u'http://www.al-monitor.com/pulse/iraq-pulse'),
|
||||||
(u'israel',u'http://www.al-monitor.com/pulse/israel-pulse'),
|
(u'israel', u'http://www.al-monitor.com/pulse/israel-pulse'),
|
||||||
(u'lebanon',u'http://www.al-monitor.com/pulse/lebanon-pulse'),
|
(u'lebanon', u'http://www.al-monitor.com/pulse/lebanon-pulse'),
|
||||||
(u'palistine',u'http://www.al-monitor.com/pulse/palistine-pulse'),
|
(u'palistine', u'http://www.al-monitor.com/pulse/palistine-pulse'),
|
||||||
(u'syria',u'http://www.al-monitor.com/pulse/syria-pulse'),
|
(u'syria', u'http://www.al-monitor.com/pulse/syria-pulse'),
|
||||||
(u'turkey',u'http://www.al-monitor.com/pulse/turkey-pulse'),
|
(u'turkey', u'http://www.al-monitor.com/pulse/turkey-pulse'),
|
||||||
]
|
]
|
||||||
|
|
||||||
# util for creating remove_tags and keep_tags style regex matchers
|
# util for creating remove_tags and keep_tags style regex matchers
|
||||||
def tag_matcher(elt, attr, rgx_str):
|
def tag_matcher(elt, attr, rgx_str):
|
||||||
return dict(name=elt, attrs={attr:re.compile(rgx_str, re.IGNORECASE)})
|
return dict(name=elt, attrs={attr: re.compile(rgx_str, re.IGNORECASE)})
|
||||||
|
|
||||||
remove_tags = [
|
remove_tags = [
|
||||||
dict(attrs={'id':[
|
dict(attrs={'id': [
|
||||||
'header',
|
'header',
|
||||||
'pulsebanner',
|
'pulsebanner',
|
||||||
'relatedarticles',
|
'relatedarticles',
|
||||||
'sidecolumn',
|
'sidecolumn',
|
||||||
'disqus',
|
'disqus',
|
||||||
'footer',
|
'footer',
|
||||||
'footer2',
|
'footer2',
|
||||||
'footer3',
|
'footer3',
|
||||||
'mobile-extras',
|
'mobile-extras',
|
||||||
]}),
|
]}),
|
||||||
tag_matcher('hr', 'id', 'spacer'),
|
tag_matcher('hr', 'id', 'spacer'),
|
||||||
tag_matcher('a', 'title', 'print this article'),
|
tag_matcher('a', 'title', 'print this article'),
|
||||||
tag_matcher('div', 'class', 'extras'),
|
tag_matcher('div', 'class', 'extras'),
|
||||||
@ -118,12 +122,12 @@ class AlMonitor(BasicNewsRecipe):
|
|||||||
if len(self.articles[section]) >= self.max_articles_per_section:
|
if len(self.articles[section]) >= self.max_articles_per_section:
|
||||||
return
|
return
|
||||||
self.articles[section].append(
|
self.articles[section].append(
|
||||||
dict(title=title,
|
dict(title=title,
|
||||||
url=full_url,
|
url=full_url,
|
||||||
date='',
|
date='',
|
||||||
description='',
|
description='',
|
||||||
author='',
|
author='',
|
||||||
content=''))
|
content=''))
|
||||||
|
|
||||||
def preprocess_raw_html(self, raw_html, url):
|
def preprocess_raw_html(self, raw_html, url):
|
||||||
reason_to_skip = self.should_skip_article(BeautifulSoup(raw_html))
|
reason_to_skip = self.should_skip_article(BeautifulSoup(raw_html))
|
||||||
@ -136,7 +140,7 @@ class AlMonitor(BasicNewsRecipe):
|
|||||||
return super(self.__class__, self).preprocess_raw_html(raw_html, url)
|
return super(self.__class__, self).preprocess_raw_html(raw_html, url)
|
||||||
|
|
||||||
def populate_article_metadata(self, article, soup, first):
|
def populate_article_metadata(self, article, soup, first):
|
||||||
summary_node = soup.find('div', {'id':'summary'})
|
summary_node = soup.find('div', {'id': 'summary'})
|
||||||
if summary_node:
|
if summary_node:
|
||||||
summary = self.text(summary_node)
|
summary = self.text(summary_node)
|
||||||
self._p('Summary: ' + summary)
|
self._p('Summary: ' + summary)
|
||||||
@ -167,7 +171,7 @@ class AlMonitor(BasicNewsRecipe):
|
|||||||
def date_from_string(self, datestring):
|
def date_from_string(self, datestring):
|
||||||
try:
|
try:
|
||||||
# eg: Posted September 17, 2014
|
# eg: Posted September 17, 2014
|
||||||
dt = datetime.datetime.strptime(datestring,"Posted %B %d, %Y")
|
dt = datetime.datetime.strptime(datestring, "Posted %B %d, %Y")
|
||||||
except:
|
except:
|
||||||
dt = None
|
dt = None
|
||||||
|
|
||||||
@ -192,14 +196,14 @@ class AlMonitor(BasicNewsRecipe):
|
|||||||
|
|
||||||
return abs_url
|
return abs_url
|
||||||
|
|
||||||
def text(self,n):
|
def text(self, n):
|
||||||
return self.tag_to_string(n).strip()
|
return self.tag_to_string(n).strip()
|
||||||
|
|
||||||
def _dbg_soup_node(self, node):
|
def _dbg_soup_node(self, node):
|
||||||
s = ' cls: ' + str(node.get('class')).strip() + \
|
s = ' cls: ' + str(node.get('class')).strip() + \
|
||||||
' id: ' + str(node.get('id')).strip() + \
|
' id: ' + str(node.get('id')).strip() + \
|
||||||
' role: ' + str(node.get('role')).strip() + \
|
' role: ' + str(node.get('role')).strip() + \
|
||||||
' txt: ' + self.text(node)
|
' txt: ' + self.text(node)
|
||||||
return s
|
return s
|
||||||
|
|
||||||
def _p(self, msg):
|
def _p(self, msg):
|
||||||
|
@ -1,10 +1,11 @@
|
|||||||
__license__ = 'GPL v3'
|
__license__ = 'GPL v3'
|
||||||
__copyright__ = '2012, Peter Grungi <p dot grungi at gmail dot com>'
|
__copyright__ = '2012, Peter Grungi <p dot grungi at gmail dot com>'
|
||||||
|
|
||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
|
||||||
|
|
||||||
class AlbertMohlersBlog(BasicNewsRecipe):
|
class AlbertMohlersBlog(BasicNewsRecipe):
|
||||||
title = u'Albert Mohler\'s Blog'
|
title = u'Albert Mohler\'s Blog'
|
||||||
__author__ = 'Peter Grungi'
|
__author__ = 'Peter Grungi'
|
||||||
language = 'en'
|
language = 'en'
|
||||||
oldest_article = 90
|
oldest_article = 90
|
||||||
@ -15,4 +16,5 @@ class AlbertMohlersBlog(BasicNewsRecipe):
|
|||||||
language = 'en'
|
language = 'en'
|
||||||
author = 'Albert Mohler'
|
author = 'Albert Mohler'
|
||||||
|
|
||||||
feeds = [(u'Albert Mohler\'s Blog', u'http://feeds.feedburner.com/AlbertMohlersBlog?format=xml')]
|
feeds = [(u'Albert Mohler\'s Blog',
|
||||||
|
u'http://feeds.feedburner.com/AlbertMohlersBlog?format=xml')]
|
||||||
|
@ -2,16 +2,16 @@ __license__ = 'GPL v3'
|
|||||||
import re
|
import re
|
||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
|
||||||
|
|
||||||
class AlejaKomiksu(BasicNewsRecipe):
|
class AlejaKomiksu(BasicNewsRecipe):
|
||||||
title = u'Aleja Komiksu'
|
title = u'Aleja Komiksu'
|
||||||
__author__ = 'fenuks'
|
__author__ = 'fenuks'
|
||||||
description = u'Serwis poświęcony komiksom. Najnowsze wieści, recenzje, artykuły, wywiady, galerie, komiksy online, konkursy, linki, baza komiksów online.'
|
description = u'Serwis poświęcony komiksom. Najnowsze wieści, recenzje, artykuły, wywiady, galerie, komiksy online, konkursy, linki, baza komiksów online.'
|
||||||
category = 'comics'
|
category = 'comics'
|
||||||
#publication_type = ''
|
language = 'pl'
|
||||||
language = 'pl'
|
|
||||||
#encoding = ''
|
|
||||||
extra_css = 'ul {list-style-type: none;} .gfx_news {float: right;}'
|
extra_css = 'ul {list-style-type: none;} .gfx_news {float: right;}'
|
||||||
preprocess_regexps = [(re.compile(ur'((<li class="no_img_b">(Do poczytania)|(Nowości):</li>)|(<p class="head2">Komentarze</p>)).*</body>', re.DOTALL|re.IGNORECASE), lambda match: '</body>')]
|
preprocess_regexps = [(re.compile(ur'((<li class="no_img_b">(Do poczytania)|(Nowości):</li>)|(<p class="head2">Komentarze</p>)).*</body>',
|
||||||
|
re.DOTALL | re.IGNORECASE), lambda match: '</body>')]
|
||||||
cover_url = 'http://www.alejakomiksu.com/gfx/build/logo.png'
|
cover_url = 'http://www.alejakomiksu.com/gfx/build/logo.png'
|
||||||
masthead_url = 'http://www.alejakomiksu.com/gfx/build/logo.png'
|
masthead_url = 'http://www.alejakomiksu.com/gfx/build/logo.png'
|
||||||
use_embedded_content = False
|
use_embedded_content = False
|
||||||
@ -23,15 +23,13 @@ class AlejaKomiksu(BasicNewsRecipe):
|
|||||||
remove_attributes = ['style', 'font']
|
remove_attributes = ['style', 'font']
|
||||||
ignore_duplicate_articles = {'title', 'url'}
|
ignore_duplicate_articles = {'title', 'url'}
|
||||||
|
|
||||||
keep_only_tags = [dict(attrs={'class':'cont_tresc'})]
|
keep_only_tags = [dict(attrs={'class': 'cont_tresc'})]
|
||||||
#remove_tags = [dict()]
|
|
||||||
#remove_tags_before = dict()
|
|
||||||
|
|
||||||
feeds = [(u'Wiadomości', 'http://www.alejakomiksu.com/rss.php5')]
|
feeds = [(u'Wiadomości', 'http://www.alejakomiksu.com/rss.php5')]
|
||||||
|
|
||||||
def skip_ad_pages(self, soup):
|
def skip_ad_pages(self, soup):
|
||||||
tag = soup.find(attrs={'class':'rodzaj'})
|
tag = soup.find(attrs={'class': 'rodzaj'})
|
||||||
if tag and tag.a.string.lower().strip() == 'recenzje':
|
if tag and tag.a.string.lower().strip() == 'recenzje':
|
||||||
link = soup.find(text=re.compile('recenzuje'))
|
link = soup.find(text=re.compile('recenzuje'))
|
||||||
if link:
|
if link:
|
||||||
return self.index_to_soup(link.parent['href'], raw=True)
|
return self.index_to_soup(link.parent['href'], raw=True)
|
||||||
|
@ -1,4 +1,4 @@
|
|||||||
__license__ = 'GPL v3'
|
__license__ = 'GPL v3'
|
||||||
__copyright__ = '2010, Darko Miletic <darko.miletic at gmail.com>'
|
__copyright__ = '2010, Darko Miletic <darko.miletic at gmail.com>'
|
||||||
|
|
||||||
'''
|
'''
|
||||||
@ -8,19 +8,20 @@ www.alo.rs
|
|||||||
import re
|
import re
|
||||||
from calibre.web.feeds.recipes import BasicNewsRecipe
|
from calibre.web.feeds.recipes import BasicNewsRecipe
|
||||||
|
|
||||||
|
|
||||||
class Alo_Novine(BasicNewsRecipe):
|
class Alo_Novine(BasicNewsRecipe):
|
||||||
title = 'Alo!'
|
title = 'Alo!'
|
||||||
__author__ = 'Darko Miletic'
|
__author__ = 'Darko Miletic'
|
||||||
description = "News Portal from Serbia"
|
description = "News Portal from Serbia"
|
||||||
publisher = 'Alo novine d.o.o.'
|
publisher = 'Alo novine d.o.o.'
|
||||||
category = 'news, politics, Serbia'
|
category = 'news, politics, Serbia'
|
||||||
oldest_article = 2
|
oldest_article = 2
|
||||||
max_articles_per_feed = 100
|
max_articles_per_feed = 100
|
||||||
delay = 4
|
delay = 4
|
||||||
no_stylesheets = True
|
no_stylesheets = True
|
||||||
encoding = 'utf-8'
|
encoding = 'utf-8'
|
||||||
use_embedded_content = False
|
use_embedded_content = False
|
||||||
language = 'sr'
|
language = 'sr'
|
||||||
extra_css = """
|
extra_css = """
|
||||||
@font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)}
|
@font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)}
|
||||||
.article_description,body{font-family: Arial,Helvetica,sans1,sans-serif}
|
.article_description,body{font-family: Arial,Helvetica,sans1,sans-serif}
|
||||||
@ -30,25 +31,23 @@ class Alo_Novine(BasicNewsRecipe):
|
|||||||
img{margin-bottom: 0.8em} """
|
img{margin-bottom: 0.8em} """
|
||||||
|
|
||||||
conversion_options = {
|
conversion_options = {
|
||||||
'comment' : description
|
'comment': description, 'tags': category, 'publisher': publisher, 'language': language
|
||||||
, 'tags' : category
|
}
|
||||||
, 'publisher': publisher
|
|
||||||
, 'language' : language
|
|
||||||
}
|
|
||||||
|
|
||||||
preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')]
|
preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')]
|
||||||
|
|
||||||
remove_tags = [dict(name=['object','link','embed'])]
|
remove_tags = [dict(name=['object', 'link', 'embed'])]
|
||||||
remove_attributes = ['height','width']
|
remove_attributes = ['height', 'width']
|
||||||
|
|
||||||
feeds = [
|
feeds = [
|
||||||
(u'Najnovije Vijesti', u'http://www.alo.rs/rss/danasnje_vesti')
|
|
||||||
,(u'Politika' , u'http://www.alo.rs/rss/politika')
|
(u'Najnovije Vijesti', u'http://www.alo.rs/rss/danasnje_vesti'),
|
||||||
,(u'Vesti' , u'http://www.alo.rs/rss/vesti')
|
(u'Politika', u'http://www.alo.rs/rss/politika'),
|
||||||
,(u'Sport' , u'http://www.alo.rs/rss/sport')
|
(u'Vesti', u'http://www.alo.rs/rss/vesti'),
|
||||||
,(u'Ljudi' , u'http://www.alo.rs/rss/ljudi')
|
(u'Sport', u'http://www.alo.rs/rss/sport'),
|
||||||
,(u'Saveti' , u'http://www.alo.rs/rss/saveti')
|
(u'Ljudi', u'http://www.alo.rs/rss/ljudi'),
|
||||||
]
|
(u'Saveti', u'http://www.alo.rs/rss/saveti')
|
||||||
|
]
|
||||||
|
|
||||||
def preprocess_html(self, soup):
|
def preprocess_html(self, soup):
|
||||||
for item in soup.findAll(style=True):
|
for item in soup.findAll(style=True):
|
||||||
@ -61,5 +60,4 @@ class Alo_Novine(BasicNewsRecipe):
|
|||||||
return 'http://www.alo.rs/resources/templates/tools/print.php?id=' + artid
|
return 'http://www.alo.rs/resources/templates/tools/print.php?id=' + artid
|
||||||
|
|
||||||
def image_url_processor(self, baseurl, url):
|
def image_url_processor(self, baseurl, url):
|
||||||
return url.replace('alo.rs//','alo.rs/')
|
return url.replace('alo.rs//', 'alo.rs/')
|
||||||
|
|
||||||
|
@ -1,4 +1,4 @@
|
|||||||
__license__ = 'GPL v3'
|
__license__ = 'GPL v3'
|
||||||
__copyright__ = '2011, Rasmus Lauritsen <rasmus at lauritsen.info>'
|
__copyright__ = '2011, Rasmus Lauritsen <rasmus at lauritsen.info>'
|
||||||
'''
|
'''
|
||||||
aoh.dk
|
aoh.dk
|
||||||
@ -6,38 +6,35 @@ aoh.dk
|
|||||||
|
|
||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
|
||||||
|
|
||||||
class aoh_dk(BasicNewsRecipe):
|
class aoh_dk(BasicNewsRecipe):
|
||||||
title = 'Alt om Herning'
|
title = 'Alt om Herning'
|
||||||
__author__ = 'Rasmus Lauritsen'
|
__author__ = 'Rasmus Lauritsen'
|
||||||
description = 'Nyheder fra Herning om omegn'
|
description = 'Nyheder fra Herning om omegn'
|
||||||
publisher = 'Mediehuset Herning Folkeblad'
|
publisher = 'Mediehuset Herning Folkeblad'
|
||||||
category = 'news, local, Denmark'
|
category = 'news, local, Denmark'
|
||||||
oldest_article = 14
|
oldest_article = 14
|
||||||
max_articles_per_feed = 50
|
max_articles_per_feed = 50
|
||||||
no_stylesheets = True
|
no_stylesheets = True
|
||||||
delay = 1
|
delay = 1
|
||||||
encoding = 'utf8'
|
encoding = 'utf8'
|
||||||
use_embedded_content = False
|
use_embedded_content = False
|
||||||
language = 'da'
|
language = 'da'
|
||||||
extra_css = """ body{font-family: Verdana,Arial,sans-serif }
|
extra_css = """ body{font-family: Verdana,Arial,sans-serif }
|
||||||
img{margin-bottom: 0.4em}
|
img{margin-bottom: 0.4em}
|
||||||
.txtContent,.stamp{font-size: small}
|
.txtContent,.stamp{font-size: small}
|
||||||
"""
|
"""
|
||||||
|
|
||||||
conversion_options = {
|
conversion_options = {
|
||||||
'comment' : description
|
'comment': description, 'tags': category, 'publisher': publisher, 'language': language
|
||||||
, 'tags' : category
|
}
|
||||||
, 'publisher' : publisher
|
|
||||||
, 'language' : language
|
|
||||||
}
|
|
||||||
|
|
||||||
feeds = [(u'All news', u'http://aoh.dk/rss.xml')]
|
feeds = [(u'All news', u'http://aoh.dk/rss.xml')]
|
||||||
|
|
||||||
keep_only_tags = [
|
keep_only_tags = [
|
||||||
dict(name='h1')
|
dict(name='h1'), dict(name='span', attrs={'class': ['frontpage_body']})
|
||||||
,dict(name='span', attrs={'class':['frontpage_body']})
|
]
|
||||||
]
|
|
||||||
|
|
||||||
remove_tags = [
|
remove_tags = [
|
||||||
dict(name=['object','link'])
|
dict(name=['object', 'link'])
|
||||||
]
|
]
|
||||||
|
@ -1,34 +1,35 @@
|
|||||||
from calibre.ptempfile import PersistentTemporaryFile
|
from calibre.ptempfile import PersistentTemporaryFile
|
||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
|
||||||
|
|
||||||
class Alternet(BasicNewsRecipe):
|
class Alternet(BasicNewsRecipe):
|
||||||
title = u'Alternet'
|
title = u'Alternet'
|
||||||
__author__= 'rty'
|
__author__ = 'rty'
|
||||||
oldest_article = 7
|
oldest_article = 7
|
||||||
max_articles_per_feed = 100
|
max_articles_per_feed = 100
|
||||||
publisher = 'alternet.org'
|
publisher = 'alternet.org'
|
||||||
category = 'News, Magazine'
|
category = 'News, Magazine'
|
||||||
description = 'News magazine and online community'
|
description = 'News magazine and online community'
|
||||||
feeds = [
|
feeds = [
|
||||||
(u'Front Page', u'http://feeds.feedblitz.com/alternet')
|
(u'Front Page', u'http://feeds.feedblitz.com/alternet')
|
||||||
]
|
]
|
||||||
|
|
||||||
remove_attributes = ['width', 'align','cellspacing']
|
remove_attributes = ['width', 'align', 'cellspacing']
|
||||||
remove_javascript = True
|
remove_javascript = True
|
||||||
use_embedded_content = True
|
use_embedded_content = True
|
||||||
no_stylesheets = True
|
no_stylesheets = True
|
||||||
language = 'en'
|
language = 'en'
|
||||||
encoding = 'UTF-8'
|
encoding = 'UTF-8'
|
||||||
temp_files = []
|
temp_files = []
|
||||||
articles_are_obfuscated = True
|
articles_are_obfuscated = True
|
||||||
|
|
||||||
def get_article_url(self, article):
|
def get_article_url(self, article):
|
||||||
return article.get('link', None)
|
return article.get('link', None)
|
||||||
|
|
||||||
def get_obfuscated_article(self, url):
|
def get_obfuscated_article(self, url):
|
||||||
br = self.get_browser()
|
br = self.get_browser()
|
||||||
br.open(url)
|
br.open(url)
|
||||||
response = br.follow_link(url_regex = r'/printversion/[0-9]+', nr = 0)
|
response = br.follow_link(url_regex=r'/printversion/[0-9]+', nr=0)
|
||||||
html = response.read()
|
html = response.read()
|
||||||
self.temp_files.append(PersistentTemporaryFile('_fa.html'))
|
self.temp_files.append(PersistentTemporaryFile('_fa.html'))
|
||||||
self.temp_files[-1].write(html)
|
self.temp_files[-1].write(html)
|
||||||
|
@ -1,6 +1,6 @@
|
|||||||
# vim:fileencoding=UTF-8
|
# vim:fileencoding=UTF-8
|
||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
__license__ = 'GPL v3'
|
__license__ = 'GPL v3'
|
||||||
__copyright__ = '2013, Eddie Lau'
|
__copyright__ = '2013, Eddie Lau'
|
||||||
__Date__ = ''
|
__Date__ = ''
|
||||||
|
|
||||||
@ -12,7 +12,9 @@ Change Log:
|
|||||||
|
|
||||||
from calibre import (__appname__, force_unicode, strftime)
|
from calibre import (__appname__, force_unicode, strftime)
|
||||||
from calibre.utils.date import now as nowf
|
from calibre.utils.date import now as nowf
|
||||||
import os, datetime, re
|
import os
|
||||||
|
import datetime
|
||||||
|
import re
|
||||||
from calibre.web.feeds.recipes import BasicNewsRecipe
|
from calibre.web.feeds.recipes import BasicNewsRecipe
|
||||||
from contextlib import nested
|
from contextlib import nested
|
||||||
from calibre.ebooks.BeautifulSoup import BeautifulSoup
|
from calibre.ebooks.BeautifulSoup import BeautifulSoup
|
||||||
@ -21,10 +23,11 @@ from calibre.ebooks.metadata.toc import TOC
|
|||||||
from calibre.ebooks.metadata import MetaInformation
|
from calibre.ebooks.metadata import MetaInformation
|
||||||
from calibre.utils.localization import canonicalize_lang
|
from calibre.utils.localization import canonicalize_lang
|
||||||
|
|
||||||
|
|
||||||
class AppleDaily(BasicNewsRecipe):
|
class AppleDaily(BasicNewsRecipe):
|
||||||
title = u'AM730'
|
title = u'AM730'
|
||||||
__author__ = 'Eddie Lau'
|
__author__ = 'Eddie Lau'
|
||||||
publisher = 'AM730'
|
publisher = 'AM730'
|
||||||
oldest_article = 1
|
oldest_article = 1
|
||||||
max_articles_per_feed = 100
|
max_articles_per_feed = 100
|
||||||
auto_cleanup = False
|
auto_cleanup = False
|
||||||
@ -35,46 +38,46 @@ class AppleDaily(BasicNewsRecipe):
|
|||||||
use_embedded_content = False
|
use_embedded_content = False
|
||||||
no_stylesheets = True
|
no_stylesheets = True
|
||||||
description = 'http://www.am730.com.hk'
|
description = 'http://www.am730.com.hk'
|
||||||
category = 'Chinese, News, Hong Kong'
|
category = 'Chinese, News, Hong Kong'
|
||||||
masthead_url = 'http://www.am730.com.hk/images/logo.jpg'
|
masthead_url = 'http://www.am730.com.hk/images/logo.jpg'
|
||||||
extra_css = 'img {display: block; margin-left: auto; margin-right: auto; margin-top: 20px; margin-bottom: 20px; max-height:70%;} div[id=articleHeader] {font-size:200%; text-align:left; font-weight:bold;} li {font-size:50%; margin-left:auto; margin-right:auto;}'
|
extra_css = 'img {display: block; margin-left: auto; margin-right: auto; margin-top: 20px; margin-bottom: 20px; max-height:70%;} div[id=articleHeader] {font-size:200%; text-align:left; font-weight:bold;} li {font-size:50%; margin-left:auto; margin-right:auto;}' # noqa
|
||||||
keep_only_tags = [dict(name='h2', attrs={'class':'printTopic'}),
|
keep_only_tags = [dict(name='h2', attrs={'class': 'printTopic'}),
|
||||||
dict(name='div', attrs={'id':'article_content'}),
|
dict(name='div', attrs={'id': 'article_content'}),
|
||||||
dict(name='div', attrs={'id':'slider'})]
|
dict(name='div', attrs={'id': 'slider'})]
|
||||||
remove_tags = [dict(name='img', attrs={'src':'images/am730_article_logo.jpg'}),
|
remove_tags = [dict(name='img', attrs={'src': 'images/am730_article_logo.jpg'}),
|
||||||
dict(name='img', attrs={'src':'images/am_endmark.gif'})]
|
dict(name='img', attrs={'src': 'images/am_endmark.gif'})]
|
||||||
|
|
||||||
def get_dtlocal(self):
|
def get_dtlocal(self):
|
||||||
dt_utc = datetime.datetime.utcnow()
|
dt_utc = datetime.datetime.utcnow()
|
||||||
# convert UTC to local hk time - at HKT 6am, all news are available
|
# convert UTC to local hk time - at HKT 6am, all news are available
|
||||||
return dt_utc + datetime.timedelta(8.0/24) - datetime.timedelta(6.0/24)
|
return dt_utc + datetime.timedelta(8.0 / 24) - datetime.timedelta(6.0 / 24)
|
||||||
|
|
||||||
def get_fetchdate(self):
|
def get_fetchdate(self):
|
||||||
if __Date__ <> '':
|
if __Date__ != '':
|
||||||
return __Date__
|
return __Date__
|
||||||
else:
|
else:
|
||||||
return self.get_dtlocal().strftime("%Y%m%d")
|
return self.get_dtlocal().strftime("%Y%m%d")
|
||||||
|
|
||||||
def get_fetchformatteddate(self):
|
def get_fetchformatteddate(self):
|
||||||
if __Date__ <> '':
|
if __Date__ != '':
|
||||||
return __Date__[0:4]+'-'+__Date__[4:6]+'-'+__Date__[6:8]
|
return __Date__[0:4] + '-' + __Date__[4:6] + '-' + __Date__[6:8]
|
||||||
else:
|
else:
|
||||||
return self.get_dtlocal().strftime("%Y-%m-%d")
|
return self.get_dtlocal().strftime("%Y-%m-%d")
|
||||||
|
|
||||||
def get_fetchyear(self):
|
def get_fetchyear(self):
|
||||||
if __Date__ <> '':
|
if __Date__ != '':
|
||||||
return __Date__[0:4]
|
return __Date__[0:4]
|
||||||
else:
|
else:
|
||||||
return self.get_dtlocal().strftime("%Y")
|
return self.get_dtlocal().strftime("%Y")
|
||||||
|
|
||||||
def get_fetchmonth(self):
|
def get_fetchmonth(self):
|
||||||
if __Date__ <> '':
|
if __Date__ != '':
|
||||||
return __Date__[4:6]
|
return __Date__[4:6]
|
||||||
else:
|
else:
|
||||||
return self.get_dtlocal().strftime("%m")
|
return self.get_dtlocal().strftime("%m")
|
||||||
|
|
||||||
def get_fetchday(self):
|
def get_fetchday(self):
|
||||||
if __Date__ <> '':
|
if __Date__ != '':
|
||||||
return __Date__[6:8]
|
return __Date__[6:8]
|
||||||
else:
|
else:
|
||||||
return self.get_dtlocal().strftime("%d")
|
return self.get_dtlocal().strftime("%d")
|
||||||
@ -85,7 +88,9 @@ class AppleDaily(BasicNewsRecipe):
|
|||||||
|
|
||||||
def get_cover_url(self):
|
def get_cover_url(self):
|
||||||
soup = self.index_to_soup('http://www.am730.com.hk')
|
soup = self.index_to_soup('http://www.am730.com.hk')
|
||||||
cover = 'http://www.am730.com.hk/' + soup.find(attrs={'id':'mini_news_img'}).find('img').get('src', False)
|
cover = 'http://www.am730.com.hk/' + \
|
||||||
|
soup.find(attrs={'id': 'mini_news_img'}).find(
|
||||||
|
'img').get('src', False)
|
||||||
br = BasicNewsRecipe.get_browser(self)
|
br = BasicNewsRecipe.get_browser(self)
|
||||||
try:
|
try:
|
||||||
br.open(cover)
|
br.open(cover)
|
||||||
@ -97,7 +102,7 @@ class AppleDaily(BasicNewsRecipe):
|
|||||||
if first and hasattr(self, 'add_toc_thumbnail'):
|
if first and hasattr(self, 'add_toc_thumbnail'):
|
||||||
picdiv = soup.find('img')
|
picdiv = soup.find('img')
|
||||||
if picdiv is not None:
|
if picdiv is not None:
|
||||||
self.add_toc_thumbnail(article,picdiv['src'])
|
self.add_toc_thumbnail(article, picdiv['src'])
|
||||||
|
|
||||||
def parse_index(self):
|
def parse_index(self):
|
||||||
feeds = []
|
feeds = []
|
||||||
@ -123,7 +128,8 @@ class AppleDaily(BasicNewsRecipe):
|
|||||||
mi.publisher = __appname__
|
mi.publisher = __appname__
|
||||||
mi.author_sort = __appname__
|
mi.author_sort = __appname__
|
||||||
if self.publication_type:
|
if self.publication_type:
|
||||||
mi.publication_type = 'periodical:'+self.publication_type+':'+self.short_title()
|
mi.publication_type = 'periodical:' + \
|
||||||
|
self.publication_type + ':' + self.short_title()
|
||||||
mi.timestamp = nowf()
|
mi.timestamp = nowf()
|
||||||
article_titles, aseen = [], set()
|
article_titles, aseen = [], set()
|
||||||
for f in feeds:
|
for f in feeds:
|
||||||
@ -136,15 +142,15 @@ class AppleDaily(BasicNewsRecipe):
|
|||||||
if not isinstance(mi.comments, unicode):
|
if not isinstance(mi.comments, unicode):
|
||||||
mi.comments = mi.comments.decode('utf-8', 'replace')
|
mi.comments = mi.comments.decode('utf-8', 'replace')
|
||||||
mi.comments += ('\n\n' + _('Articles in this issue: ') + '\n' +
|
mi.comments += ('\n\n' + _('Articles in this issue: ') + '\n' +
|
||||||
'\n\n'.join(article_titles))
|
'\n\n'.join(article_titles))
|
||||||
|
|
||||||
language = canonicalize_lang(self.language)
|
language = canonicalize_lang(self.language)
|
||||||
if language is not None:
|
if language is not None:
|
||||||
mi.language = language
|
mi.language = language
|
||||||
# This one affects the pub date shown in kindle title
|
# This one affects the pub date shown in kindle title
|
||||||
#mi.pubdate = nowf()
|
|
||||||
# now appears to need the time field to be > 12.00noon as well
|
# now appears to need the time field to be > 12.00noon as well
|
||||||
mi.pubdate = datetime.datetime(int(self.get_fetchyear()), int(self.get_fetchmonth()), int(self.get_fetchday()), 12, 30, 0)
|
mi.pubdate = datetime.datetime(int(self.get_fetchyear()), int(
|
||||||
|
self.get_fetchmonth()), int(self.get_fetchday()), 12, 30, 0)
|
||||||
opf_path = os.path.join(dir, 'index.opf')
|
opf_path = os.path.join(dir, 'index.opf')
|
||||||
ncx_path = os.path.join(dir, 'index.ncx')
|
ncx_path = os.path.join(dir, 'index.ncx')
|
||||||
|
|
||||||
@ -153,12 +159,14 @@ class AppleDaily(BasicNewsRecipe):
|
|||||||
mp = getattr(self, 'masthead_path', None)
|
mp = getattr(self, 'masthead_path', None)
|
||||||
if mp is not None and os.access(mp, os.R_OK):
|
if mp is not None and os.access(mp, os.R_OK):
|
||||||
from calibre.ebooks.metadata.opf2 import Guide
|
from calibre.ebooks.metadata.opf2 import Guide
|
||||||
ref = Guide.Reference(os.path.basename(self.masthead_path), os.getcwdu())
|
ref = Guide.Reference(os.path.basename(
|
||||||
|
self.masthead_path), os.getcwdu())
|
||||||
ref.type = 'masthead'
|
ref.type = 'masthead'
|
||||||
ref.title = 'Masthead Image'
|
ref.title = 'Masthead Image'
|
||||||
opf.guide.append(ref)
|
opf.guide.append(ref)
|
||||||
|
|
||||||
manifest = [os.path.join(dir, 'feed_%d'%i) for i in range(len(feeds))]
|
manifest = [os.path.join(dir, 'feed_%d' % i)
|
||||||
|
for i in range(len(feeds))]
|
||||||
manifest.append(os.path.join(dir, 'index.html'))
|
manifest.append(os.path.join(dir, 'index.html'))
|
||||||
manifest.append(os.path.join(dir, 'index.ncx'))
|
manifest.append(os.path.join(dir, 'index.ncx'))
|
||||||
|
|
||||||
@ -167,7 +175,7 @@ class AppleDaily(BasicNewsRecipe):
|
|||||||
if cpath is None:
|
if cpath is None:
|
||||||
pf = open(os.path.join(dir, 'cover.jpg'), 'wb')
|
pf = open(os.path.join(dir, 'cover.jpg'), 'wb')
|
||||||
if self.default_cover(pf):
|
if self.default_cover(pf):
|
||||||
cpath = pf.name
|
cpath = pf.name
|
||||||
if cpath is not None and os.access(cpath, os.R_OK):
|
if cpath is not None and os.access(cpath, os.R_OK):
|
||||||
opf.cover = cpath
|
opf.cover = cpath
|
||||||
manifest.append(cpath)
|
manifest.append(cpath)
|
||||||
@ -189,12 +197,11 @@ class AppleDaily(BasicNewsRecipe):
|
|||||||
self.play_order_counter = 0
|
self.play_order_counter = 0
|
||||||
self.play_order_map = {}
|
self.play_order_map = {}
|
||||||
|
|
||||||
|
|
||||||
def feed_index(num, parent):
|
def feed_index(num, parent):
|
||||||
f = feeds[num]
|
f = feeds[num]
|
||||||
for j, a in enumerate(f):
|
for j, a in enumerate(f):
|
||||||
if getattr(a, 'downloaded', False):
|
if getattr(a, 'downloaded', False):
|
||||||
adir = 'feed_%d/article_%d/'%(num, j)
|
adir = 'feed_%d/article_%d/' % (num, j)
|
||||||
auth = a.author
|
auth = a.author
|
||||||
if not auth:
|
if not auth:
|
||||||
auth = None
|
auth = None
|
||||||
@ -204,16 +211,18 @@ class AppleDaily(BasicNewsRecipe):
|
|||||||
else:
|
else:
|
||||||
desc = self.description_limiter(desc)
|
desc = self.description_limiter(desc)
|
||||||
tt = a.toc_thumbnail if a.toc_thumbnail else None
|
tt = a.toc_thumbnail if a.toc_thumbnail else None
|
||||||
entries.append('%sindex.html'%adir)
|
entries.append('%sindex.html' % adir)
|
||||||
po = self.play_order_map.get(entries[-1], None)
|
po = self.play_order_map.get(entries[-1], None)
|
||||||
if po is None:
|
if po is None:
|
||||||
self.play_order_counter += 1
|
self.play_order_counter += 1
|
||||||
po = self.play_order_counter
|
po = self.play_order_counter
|
||||||
parent.add_item('%sindex.html'%adir, None,
|
parent.add_item('%sindex.html' % adir, None,
|
||||||
a.title if a.title else _('Untitled Article'),
|
a.title if a.title else _(
|
||||||
play_order=po, author=auth,
|
'Untitled Article'),
|
||||||
description=desc, toc_thumbnail=tt)
|
play_order=po, author=auth,
|
||||||
last = os.path.join(self.output_dir, ('%sindex.html'%adir).replace('/', os.sep))
|
description=desc, toc_thumbnail=tt)
|
||||||
|
last = os.path.join(
|
||||||
|
self.output_dir, ('%sindex.html' % adir).replace('/', os.sep))
|
||||||
for sp in a.sub_pages:
|
for sp in a.sub_pages:
|
||||||
prefix = os.path.commonprefix([opf_path, sp])
|
prefix = os.path.commonprefix([opf_path, sp])
|
||||||
relp = sp[len(prefix):]
|
relp = sp[len(prefix):]
|
||||||
@ -226,12 +235,14 @@ class AppleDaily(BasicNewsRecipe):
|
|||||||
soup = BeautifulSoup(src)
|
soup = BeautifulSoup(src)
|
||||||
body = soup.find('body')
|
body = soup.find('body')
|
||||||
if body is not None:
|
if body is not None:
|
||||||
prefix = '/'.join('..'for i in range(2*len(re.findall(r'link\d+', last))))
|
prefix = '/'.join('..'for i in range(2 *
|
||||||
|
len(re.findall(r'link\d+', last))))
|
||||||
templ = self.navbar.generate(True, num, j, len(f),
|
templ = self.navbar.generate(True, num, j, len(f),
|
||||||
not self.has_single_feed,
|
not self.has_single_feed,
|
||||||
a.orig_url, __appname__, prefix=prefix,
|
a.orig_url, __appname__, prefix=prefix,
|
||||||
center=self.center_navbar)
|
center=self.center_navbar)
|
||||||
elem = BeautifulSoup(templ.render(doctype='xhtml').decode('utf-8')).find('div')
|
elem = BeautifulSoup(templ.render(
|
||||||
|
doctype='xhtml').decode('utf-8')).find('div')
|
||||||
body.insert(len(body.contents), elem)
|
body.insert(len(body.contents), elem)
|
||||||
with open(last, 'wb') as fi:
|
with open(last, 'wb') as fi:
|
||||||
fi.write(unicode(soup).encode('utf-8'))
|
fi.write(unicode(soup).encode('utf-8'))
|
||||||
@ -240,7 +251,7 @@ class AppleDaily(BasicNewsRecipe):
|
|||||||
|
|
||||||
if len(feeds) > 1:
|
if len(feeds) > 1:
|
||||||
for i, f in enumerate(feeds):
|
for i, f in enumerate(feeds):
|
||||||
entries.append('feed_%d/index.html'%i)
|
entries.append('feed_%d/index.html' % i)
|
||||||
po = self.play_order_map.get(entries[-1], None)
|
po = self.play_order_map.get(entries[-1], None)
|
||||||
if po is None:
|
if po is None:
|
||||||
self.play_order_counter += 1
|
self.play_order_counter += 1
|
||||||
@ -251,11 +262,11 @@ class AppleDaily(BasicNewsRecipe):
|
|||||||
desc = getattr(f, 'description', None)
|
desc = getattr(f, 'description', None)
|
||||||
if not desc:
|
if not desc:
|
||||||
desc = None
|
desc = None
|
||||||
feed_index(i, toc.add_item('feed_%d/index.html'%i, None,
|
feed_index(i, toc.add_item('feed_%d/index.html' % i, None,
|
||||||
f.title, play_order=po, description=desc, author=auth))
|
f.title, play_order=po, description=desc, author=auth))
|
||||||
|
|
||||||
else:
|
else:
|
||||||
entries.append('feed_%d/index.html'%0)
|
entries.append('feed_%d/index.html' % 0)
|
||||||
feed_index(0, toc)
|
feed_index(0, toc)
|
||||||
|
|
||||||
for i, p in enumerate(entries):
|
for i, p in enumerate(entries):
|
||||||
@ -265,5 +276,3 @@ class AppleDaily(BasicNewsRecipe):
|
|||||||
|
|
||||||
with nested(open(opf_path, 'wb'), open(ncx_path, 'wb')) as (opf_file, ncx_file):
|
with nested(open(opf_path, 'wb'), open(ncx_path, 'wb')) as (opf_file, ncx_file):
|
||||||
opf.render(opf_file, ncx_file)
|
opf.render(opf_file, ncx_file)
|
||||||
|
|
||||||
|
|
||||||
|
@ -1,4 +1,4 @@
|
|||||||
__license__ = 'GPL v3'
|
__license__ = 'GPL v3'
|
||||||
__copyright__ = '2008-2015, Darko Miletic <darko.miletic at gmail.com>'
|
__copyright__ = '2008-2015, Darko Miletic <darko.miletic at gmail.com>'
|
||||||
'''
|
'''
|
||||||
ambito.com
|
ambito.com
|
||||||
@ -6,46 +6,46 @@ ambito.com
|
|||||||
|
|
||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
|
||||||
|
|
||||||
class Ambito(BasicNewsRecipe):
|
class Ambito(BasicNewsRecipe):
|
||||||
title = 'Ambito.com'
|
title = 'Ambito.com'
|
||||||
__author__ = 'Darko Miletic'
|
__author__ = 'Darko Miletic'
|
||||||
description = 'Ambito.com con noticias del Diario Ambito Financiero de Buenos Aires'
|
description = 'Ambito.com con noticias del Diario Ambito Financiero de Buenos Aires'
|
||||||
publisher = 'Editorial Nefir S.A.'
|
publisher = 'Editorial Nefir S.A.'
|
||||||
category = 'news, politics, economy, finances, Argentina'
|
category = 'news, politics, economy, finances, Argentina'
|
||||||
oldest_article = 2
|
oldest_article = 2
|
||||||
no_stylesheets = True
|
no_stylesheets = True
|
||||||
encoding = 'cp1252'
|
encoding = 'cp1252'
|
||||||
masthead_url = 'http://www.ambito.com/img/logo.jpg'
|
masthead_url = 'http://www.ambito.com/img/logo.jpg'
|
||||||
use_embedded_content = False
|
use_embedded_content = False
|
||||||
remove_empty_feeds = True
|
remove_empty_feeds = True
|
||||||
language = 'es_AR'
|
language = 'es_AR'
|
||||||
publication_type = 'newsportal'
|
publication_type = 'newsportal'
|
||||||
extra_css = """
|
extra_css = """
|
||||||
body{font-family: "Trebuchet MS",Verdana,sans-serif}
|
body{font-family: "Trebuchet MS",Verdana,sans-serif}
|
||||||
.volanta{font-size: small}
|
.volanta{font-size: small}
|
||||||
.t2_portada{font-size: xx-large; font-family: Georgia,serif; color: #026698}
|
.t2_portada{font-size: xx-large; font-family: Georgia,serif; color: #026698}
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
|
||||||
conversion_options = {
|
conversion_options = {
|
||||||
'comment' : description
|
'comment': description, 'tags': category, 'publisher': publisher, 'language': language
|
||||||
, 'tags' : category
|
}
|
||||||
, 'publisher' : publisher
|
|
||||||
, 'language' : language
|
|
||||||
}
|
|
||||||
|
|
||||||
keep_only_tags = [dict(attrs={'id':['tituloDespliegue','imgDesp','textoDespliegue']})]
|
keep_only_tags = [
|
||||||
remove_tags = [dict(name=['object','link','embed','iframe','meta','link'])]
|
dict(attrs={'id': ['tituloDespliegue', 'imgDesp', 'textoDespliegue']})]
|
||||||
|
remove_tags = [
|
||||||
|
dict(name=['object', 'link', 'embed', 'iframe', 'meta', 'link'])]
|
||||||
|
|
||||||
feeds = [
|
feeds = [
|
||||||
(u'Principales Noticias', u'http://www.ambito.com/rss/noticiasp.asp' )
|
|
||||||
,(u'Economia' , u'http://www.ambito.com/rss/noticias.asp?S=Econom%EDa' )
|
(u'Principales Noticias', u'http://www.ambito.com/rss/noticiasp.asp'),
|
||||||
,(u'Politica' , u'http://www.ambito.com/rss/noticias.asp?S=Pol%EDtica' )
|
(u'Economia', u'http://www.ambito.com/rss/noticias.asp?S=Econom%EDa'),
|
||||||
,(u'Informacion General' , u'http://www.ambito.com/rss/noticias.asp?S=Informaci%F3n%20General')
|
(u'Politica', u'http://www.ambito.com/rss/noticias.asp?S=Pol%EDtica'),
|
||||||
,(u'Campo' , u'http://www.ambito.com/rss/noticias.asp?S=Agro' )
|
(u'Informacion General', u'http://www.ambito.com/rss/noticias.asp?S=Informaci%F3n%20General'),
|
||||||
,(u'Internacionales' , u'http://www.ambito.com/rss/noticias.asp?S=Internacionales' )
|
(u'Campo', u'http://www.ambito.com/rss/noticias.asp?S=Agro'),
|
||||||
,(u'Deportes' , u'http://www.ambito.com/rss/noticias.asp?S=Deportes' )
|
(u'Internacionales', u'http://www.ambito.com/rss/noticias.asp?S=Internacionales'),
|
||||||
,(u'Espectaculos' , u'http://www.ambito.com/rss/noticias.asp?S=Espect%E1culos' )
|
(u'Deportes', u'http://www.ambito.com/rss/noticias.asp?S=Deportes'),
|
||||||
,(u'Tecnologia' , u'http://www.ambito.com/rss/noticias.asp?S=Tecnolog%EDa' )
|
(u'Espectaculos', u'http://www.ambito.com/rss/noticias.asp?S=Espect%E1culos'),
|
||||||
,(u'Ambito Nacional' , u'http://www.ambito.com/rss/noticias.asp?S=Ambito%20Nacional' )
|
(u'Tecnologia', u'http://www.ambito.com/rss/noticias.asp?S=Tecnolog%EDa'),
|
||||||
]
|
(u'Ambito Nacional', u'http://www.ambito.com/rss/noticias.asp?S=Ambito%20Nacional')
|
||||||
|
]
|
||||||
|
@ -1,4 +1,4 @@
|
|||||||
__license__ = 'GPL v3'
|
__license__ = 'GPL v3'
|
||||||
__copyright__ = '2011, Darko Miletic <darko.miletic at gmail.com>'
|
__copyright__ = '2011, Darko Miletic <darko.miletic at gmail.com>'
|
||||||
'''
|
'''
|
||||||
ambito.com/diario
|
ambito.com/diario
|
||||||
@ -8,22 +8,23 @@ import time
|
|||||||
from calibre import strftime
|
from calibre import strftime
|
||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
|
||||||
|
|
||||||
class Ambito_Financiero(BasicNewsRecipe):
|
class Ambito_Financiero(BasicNewsRecipe):
|
||||||
title = 'Ambito Financiero'
|
title = 'Ambito Financiero'
|
||||||
__author__ = 'Darko Miletic'
|
__author__ = 'Darko Miletic'
|
||||||
description = 'Informacion Libre las 24 horas'
|
description = 'Informacion Libre las 24 horas'
|
||||||
publisher = 'Editorial Nefir S.A.'
|
publisher = 'Editorial Nefir S.A.'
|
||||||
category = 'news, politics, economy, Argentina'
|
category = 'news, politics, economy, Argentina'
|
||||||
no_stylesheets = True
|
no_stylesheets = True
|
||||||
encoding = 'cp1252'
|
encoding = 'cp1252'
|
||||||
masthead_url = 'http://www.ambito.com/diario/img/logo_af.gif'
|
masthead_url = 'http://www.ambito.com/diario/img/logo_af.gif'
|
||||||
publication_type = 'newspaper'
|
publication_type = 'newspaper'
|
||||||
needs_subscription = 'optional'
|
needs_subscription = 'optional'
|
||||||
use_embedded_content = False
|
use_embedded_content = False
|
||||||
language = 'es_AR'
|
language = 'es_AR'
|
||||||
PREFIX = 'http://www.ambito.com'
|
PREFIX = 'http://www.ambito.com'
|
||||||
INDEX = PREFIX + '/diario/index.asp'
|
INDEX = PREFIX + '/diario/index.asp'
|
||||||
LOGIN = PREFIX + '/diario/login/entrada.asp'
|
LOGIN = PREFIX + '/diario/login/entrada.asp'
|
||||||
extra_css = """
|
extra_css = """
|
||||||
body{font-family: "Trebuchet MS",Verdana,sans-serif}
|
body{font-family: "Trebuchet MS",Verdana,sans-serif}
|
||||||
.volanta{font-size: small}
|
.volanta{font-size: small}
|
||||||
@ -31,14 +32,12 @@ class Ambito_Financiero(BasicNewsRecipe):
|
|||||||
"""
|
"""
|
||||||
|
|
||||||
conversion_options = {
|
conversion_options = {
|
||||||
'comment' : description
|
'comment': description, 'tags': category, 'publisher': publisher, 'language': language
|
||||||
, 'tags' : category
|
}
|
||||||
, 'publisher' : publisher
|
|
||||||
, 'language' : language
|
|
||||||
}
|
|
||||||
|
|
||||||
keep_only_tags = [dict(name='div', attrs={'align':'justify'})]
|
keep_only_tags = [dict(name='div', attrs={'align': 'justify'})]
|
||||||
remove_tags = [dict(name=['object','link','embed','iframe','meta','link','table','img'])]
|
remove_tags = [dict(name=['object', 'link', 'embed',
|
||||||
|
'iframe', 'meta', 'link', 'table', 'img'])]
|
||||||
remove_attributes = ['align']
|
remove_attributes = ['align']
|
||||||
|
|
||||||
def get_browser(self):
|
def get_browser(self):
|
||||||
@ -53,7 +52,7 @@ class Ambito_Financiero(BasicNewsRecipe):
|
|||||||
return br
|
return br
|
||||||
|
|
||||||
def print_version(self, url):
|
def print_version(self, url):
|
||||||
return url.replace('/diario/noticia.asp?','/noticias/imprimir.asp?')
|
return url.replace('/diario/noticia.asp?', '/noticias/imprimir.asp?')
|
||||||
|
|
||||||
def preprocess_html(self, soup):
|
def preprocess_html(self, soup):
|
||||||
for item in soup.findAll(style=True):
|
for item in soup.findAll(style=True):
|
||||||
@ -61,27 +60,24 @@ class Ambito_Financiero(BasicNewsRecipe):
|
|||||||
for item in soup.findAll('a'):
|
for item in soup.findAll('a'):
|
||||||
str = item.string
|
str = item.string
|
||||||
if str is None:
|
if str is None:
|
||||||
str = self.tag_to_string(item)
|
str = self.tag_to_string(item)
|
||||||
item.replaceWith(str)
|
item.replaceWith(str)
|
||||||
return soup
|
return soup
|
||||||
|
|
||||||
def parse_index(self):
|
def parse_index(self):
|
||||||
soup = self.index_to_soup(self.INDEX)
|
soup = self.index_to_soup(self.INDEX)
|
||||||
cover_item = soup.find('img',attrs={'class':'fotodespliegue'})
|
cover_item = soup.find('img', attrs={'class': 'fotodespliegue'})
|
||||||
if cover_item:
|
if cover_item:
|
||||||
self.cover_url = self.PREFIX + cover_item['src']
|
self.cover_url = self.PREFIX + cover_item['src']
|
||||||
articles = []
|
articles = []
|
||||||
checker = []
|
checker = []
|
||||||
for feed_link in soup.findAll('a', attrs={'class':['t0_portada','t2_portada','bajada']}):
|
for feed_link in soup.findAll('a', attrs={'class': ['t0_portada', 't2_portada', 'bajada']}):
|
||||||
url = self.PREFIX + feed_link['href']
|
url = self.PREFIX + feed_link['href']
|
||||||
title = self.tag_to_string(feed_link)
|
title = self.tag_to_string(feed_link)
|
||||||
date = strftime("%a, %d %b %Y %H:%M:%S +0000",time.gmtime())
|
date = strftime("%a, %d %b %Y %H:%M:%S +0000", time.gmtime())
|
||||||
if url not in checker:
|
if url not in checker:
|
||||||
checker.append(url)
|
checker.append(url)
|
||||||
articles.append({
|
articles.append({
|
||||||
'title' :title
|
'title': title, 'date': date, 'url': url, 'description': u''
|
||||||
,'date' :date
|
})
|
||||||
,'url' :url
|
|
||||||
,'description':u''
|
|
||||||
})
|
|
||||||
return [(self.title, articles)]
|
return [(self.title, articles)]
|
||||||
|
@ -1,4 +1,4 @@
|
|||||||
__license__ = 'GPL v3'
|
__license__ = 'GPL v3'
|
||||||
__copyright__ = '2010, Walt Anthony <workshop.northpole at gmail.com>'
|
__copyright__ = '2010, Walt Anthony <workshop.northpole at gmail.com>'
|
||||||
'''
|
'''
|
||||||
www.americanthinker.com
|
www.americanthinker.com
|
||||||
@ -8,37 +8,34 @@ from calibre.web.feeds.news import BasicNewsRecipe
|
|||||||
from calibre.utils.cleantext import clean_xml_chars
|
from calibre.utils.cleantext import clean_xml_chars
|
||||||
from lxml import etree
|
from lxml import etree
|
||||||
|
|
||||||
|
|
||||||
class AmericanThinker(BasicNewsRecipe):
|
class AmericanThinker(BasicNewsRecipe):
|
||||||
title = u'American Thinker'
|
title = u'American Thinker'
|
||||||
description = "American Thinker is a daily internet publication devoted to the thoughtful exploration of issues of importance to Americans."
|
description = "American Thinker is a daily internet publication devoted to the thoughtful exploration of issues of importance to Americans."
|
||||||
__author__ = 'Walt Anthony'
|
__author__ = 'Walt Anthony'
|
||||||
publisher = 'Thomas Lifson'
|
publisher = 'Thomas Lifson'
|
||||||
category = 'news, politics, USA'
|
category = 'news, politics, USA'
|
||||||
oldest_article = 7 # days
|
oldest_article = 7 # days
|
||||||
max_articles_per_feed = 50
|
max_articles_per_feed = 50
|
||||||
summary_length = 150
|
summary_length = 150
|
||||||
language = 'en'
|
language = 'en'
|
||||||
ignore_duplicate_articles = {'title', 'url'}
|
ignore_duplicate_articles = {'title', 'url'}
|
||||||
|
|
||||||
remove_javascript = True
|
remove_javascript = True
|
||||||
auto_cleanup = True
|
auto_cleanup = True
|
||||||
|
|
||||||
conversion_options = {
|
conversion_options = {
|
||||||
'comment' : description
|
'comment': description, 'tags': category, 'publisher': publisher, 'language': language, 'linearize_tables': True
|
||||||
, 'tags' : category
|
}
|
||||||
, 'publisher' : publisher
|
|
||||||
, 'language' : language
|
|
||||||
, 'linearize_tables' : True
|
|
||||||
}
|
|
||||||
|
|
||||||
def preprocess_raw_html(self, raw, url):
|
def preprocess_raw_html(self, raw, url):
|
||||||
root = html5lib.parse(
|
root = html5lib.parse(
|
||||||
clean_xml_chars(raw), treebuilder='lxml',
|
clean_xml_chars(raw), treebuilder='lxml',
|
||||||
namespaceHTMLElements=False)
|
namespaceHTMLElements=False)
|
||||||
for x in root.xpath('''descendant-or-self::*[@class and contains(concat(' ', normalize-space(@class), ' '), ' article_body ') and (@class and contains(concat(' ', normalize-space(@class), ' '), ' bottom '))]'''):
|
for x in root.xpath('''descendant-or-self::*[@class and contains(concat(' ', normalize-space(@class), ' '), ' article_body ') and (@class and contains(concat(' ', normalize-space(@class), ' '), ' bottom '))]'''): # noqa
|
||||||
x.getparent().remove(x)
|
x.getparent().remove(x)
|
||||||
return etree.tostring(root, encoding=unicode)
|
return etree.tostring(root, encoding=unicode)
|
||||||
|
|
||||||
feeds = [(u'http://feeds.feedburner.com/americanthinker'),
|
feeds = [(u'http://feeds.feedburner.com/americanthinker'),
|
||||||
(u'http://feeds.feedburner.com/AmericanThinkerBlog')
|
(u'http://feeds.feedburner.com/AmericanThinkerBlog')
|
||||||
]
|
]
|
||||||
|
@ -1,4 +1,4 @@
|
|||||||
__license__ = 'GPL v3'
|
__license__ = 'GPL v3'
|
||||||
__copyright__ = '2009-2010, Darko Miletic <darko.miletic at gmail.com>'
|
__copyright__ = '2009-2010, Darko Miletic <darko.miletic at gmail.com>'
|
||||||
'''
|
'''
|
||||||
spectator.org
|
spectator.org
|
||||||
@ -7,20 +7,22 @@ spectator.org
|
|||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
from css_selectors import Select
|
from css_selectors import Select
|
||||||
|
|
||||||
|
|
||||||
class TheAmericanSpectator(BasicNewsRecipe):
|
class TheAmericanSpectator(BasicNewsRecipe):
|
||||||
title = 'The American Spectator'
|
title = 'The American Spectator'
|
||||||
__author__ = 'Kovid Goyal'
|
__author__ = 'Kovid Goyal'
|
||||||
description = 'News from USA'
|
description = 'News from USA'
|
||||||
oldest_article = 7
|
oldest_article = 7
|
||||||
max_articles_per_feed = 100
|
max_articles_per_feed = 100
|
||||||
no_stylesheets = True
|
no_stylesheets = True
|
||||||
use_embedded_content = False
|
use_embedded_content = False
|
||||||
language = 'en'
|
language = 'en'
|
||||||
auto_cleanup = True
|
auto_cleanup = True
|
||||||
encoding = 'utf-8'
|
encoding = 'utf-8'
|
||||||
|
|
||||||
def parse_index(self):
|
def parse_index(self):
|
||||||
root = self.index_to_soup('http://spectator.org/issues/current', as_tree=True)
|
root = self.index_to_soup(
|
||||||
|
'http://spectator.org/issues/current', as_tree=True)
|
||||||
select = Select(root)
|
select = Select(root)
|
||||||
main = tuple(select('div#block-system-main'))[0]
|
main = tuple(select('div#block-system-main'))[0]
|
||||||
feeds = []
|
feeds = []
|
||||||
@ -43,7 +45,8 @@ class TheAmericanSpectator(BasicNewsRecipe):
|
|||||||
for x in select('div.views-field-field-short-summary', li):
|
for x in select('div.views-field-field-short-summary', li):
|
||||||
desc = self.tag_to_string(x)
|
desc = self.tag_to_string(x)
|
||||||
break
|
break
|
||||||
articles.append({'title':title, 'url':url, 'description':desc})
|
articles.append(
|
||||||
|
{'title': title, 'url': url, 'description': desc})
|
||||||
self.log('\t', title, 'at', url)
|
self.log('\t', title, 'at', url)
|
||||||
feeds.append((section_title, articles))
|
feeds.append((section_title, articles))
|
||||||
return feeds
|
return feeds
|
||||||
|
@ -1,12 +1,13 @@
|
|||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
|
||||||
|
|
||||||
class AnDrumaMor(BasicNewsRecipe):
|
class AnDrumaMor(BasicNewsRecipe):
|
||||||
title = u'An Druma M\xf3r'
|
title = u'An Druma M\xf3r'
|
||||||
__author__ = "David O'Callaghan"
|
__author__ = "David O'Callaghan"
|
||||||
oldest_article = 7
|
oldest_article = 7
|
||||||
max_articles_per_feed = 100
|
max_articles_per_feed = 100
|
||||||
language = 'ga'
|
language = 'ga'
|
||||||
use_embedded_content = True
|
use_embedded_content = True
|
||||||
|
|
||||||
feeds = [(u'Nuacht Laeth\xfail', u'http://feeds.feedburner.com/NuachtLneLaethilArAnDrumaMr')]
|
feeds = [(u'Nuacht Laeth\xfail',
|
||||||
|
u'http://feeds.feedburner.com/NuachtLneLaethilArAnDrumaMr')]
|
||||||
|
@ -1,4 +1,4 @@
|
|||||||
__license__ = 'GPL v3'
|
__license__ = 'GPL v3'
|
||||||
__copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
|
__copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
|
||||||
|
|
||||||
'''
|
'''
|
||||||
@ -13,10 +13,10 @@ class anan(BasicNewsRecipe):
|
|||||||
title = 'Anandtech'
|
title = 'Anandtech'
|
||||||
description = 'comprehensive Hardware Tests'
|
description = 'comprehensive Hardware Tests'
|
||||||
__author__ = 'Oliver Niesner, Armin Geller' # 2014-02-27 AGE: update
|
__author__ = 'Oliver Niesner, Armin Geller' # 2014-02-27 AGE: update
|
||||||
use_embedded_content = False
|
use_embedded_content = False
|
||||||
language = 'en'
|
language = 'en'
|
||||||
timefmt = ' [%d %b %Y]'
|
timefmt = ' [%d %b %Y]'
|
||||||
oldest_article = 7
|
oldest_article = 7
|
||||||
max_articles_per_feed = 40
|
max_articles_per_feed = 40
|
||||||
no_stylesheets = True
|
no_stylesheets = True
|
||||||
remove_javascript = True
|
remove_javascript = True
|
||||||
@ -26,17 +26,17 @@ class anan(BasicNewsRecipe):
|
|||||||
masthead_url = 'http://www.anandtech.com/content/images/globals/printheader.png'
|
masthead_url = 'http://www.anandtech.com/content/images/globals/printheader.png'
|
||||||
|
|
||||||
keep_only_tags = [
|
keep_only_tags = [
|
||||||
dict(name='section', attrs={'class':['main_cont']}),
|
dict(name='section', attrs={'class': ['main_cont']}),
|
||||||
]
|
]
|
||||||
remove_tags=[
|
remove_tags = [
|
||||||
dict(name='div', attrs={'class':['print',
|
dict(name='div', attrs={'class': ['print',
|
||||||
'breadcrumb_area noprint',
|
'breadcrumb_area noprint',
|
||||||
'fl-rt noprint',
|
'fl-rt noprint',
|
||||||
'blog_top_right',]})
|
'blog_top_right', ]})
|
||||||
]
|
]
|
||||||
|
|
||||||
feeds = [('Anandtech', 'http://www.anandtech.com/rss/')]
|
feeds = [('Anandtech', 'http://www.anandtech.com/rss/')]
|
||||||
|
|
||||||
def print_version(self,url):
|
def print_version(self, url):
|
||||||
# return url.replace("0Cshow0C", "0Cprint0C") # 2013-09-07 AGE: update
|
# return url.replace("0Cshow0C", "0Cprint0C") # 2013-09-07 AGE: update
|
||||||
return url.replace("/show/", "/print/") # 2014-02-27 AGE: update
|
return url.replace("/show/", "/print/") # 2014-02-27 AGE: update
|
||||||
|
@ -1,38 +1,28 @@
|
|||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
|
||||||
|
|
||||||
class AdvancedUserRecipe1278347258(BasicNewsRecipe):
|
class AdvancedUserRecipe1278347258(BasicNewsRecipe):
|
||||||
title = u'Anchorage Daily News'
|
title = u'Anchorage Daily News'
|
||||||
__author__ = 'rty'
|
__author__ = 'rty'
|
||||||
oldest_article = 7
|
oldest_article = 7
|
||||||
max_articles_per_feed = 100
|
max_articles_per_feed = 100
|
||||||
auto_cleanup = True
|
auto_cleanup = True
|
||||||
|
|
||||||
|
feeds = [(u'Alaska News', u'http://www.adn.com/rss-feeds/feed/all'),
|
||||||
feeds = [(u'Alaska News', u'http://www.adn.com/rss-feeds/feed/all'),
|
(u'Politics', u'http://www.adn.com/rss-feeds/feed/politics'),
|
||||||
(u'Politics', u'http://www.adn.com/rss-feeds/feed/politics'),
|
]
|
||||||
]
|
|
||||||
description = ''''Alaska's Newspaper'''
|
description = ''''Alaska's Newspaper'''
|
||||||
publisher = 'http://www.adn.com'
|
publisher = 'http://www.adn.com'
|
||||||
category = 'news, Alaska, Anchorage'
|
category = 'news, Alaska, Anchorage'
|
||||||
language = 'en'
|
language = 'en'
|
||||||
extra_css = '''
|
extra_css = '''
|
||||||
p{font-weight: normal;text-align: justify}
|
p{font-weight: normal;text-align: justify}
|
||||||
'''
|
'''
|
||||||
remove_javascript = True
|
remove_javascript = True
|
||||||
use_embedded_content = False
|
use_embedded_content = False
|
||||||
no_stylesheets = True
|
no_stylesheets = True
|
||||||
language = 'en'
|
language = 'en'
|
||||||
encoding = 'utf-8'
|
encoding = 'utf-8'
|
||||||
conversion_options = {'linearize_tables':True}
|
conversion_options = {'linearize_tables': True}
|
||||||
masthead_url = 'http://media.adn.com/includes/assets/images/adn_logo.2.gif'
|
masthead_url = 'http://media.adn.com/includes/assets/images/adn_logo.2.gif'
|
||||||
|
|
||||||
#keep_only_tags = [
|
|
||||||
#dict(name='div', attrs={'class':'left_col story_mainbar'}),
|
|
||||||
#]
|
|
||||||
#remove_tags = [
|
|
||||||
#dict(name='div', attrs={'class':'story_tools'}),
|
|
||||||
#dict(name='p', attrs={'class':'ad_label'}),
|
|
||||||
#]
|
|
||||||
#remove_tags_after = [
|
|
||||||
#dict(name='div', attrs={'class':'advertisement'}),
|
|
||||||
#]
|
|
||||||
|
@ -1,15 +1,17 @@
|
|||||||
import re
|
import re
|
||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
|
||||||
|
|
||||||
class Android_com_pl(BasicNewsRecipe):
|
class Android_com_pl(BasicNewsRecipe):
|
||||||
title = u'Android.com.pl'
|
title = u'Android.com.pl'
|
||||||
__author__ = 'fenuks'
|
__author__ = 'fenuks'
|
||||||
description = u'Android.com.pl - to największe w Polsce centrum Android OS. Znajdziesz tu: nowości, forum, pomoc, recenzje, gry, aplikacje.'
|
description = u'Android.com.pl - to największe w Polsce centrum Android OS. Znajdziesz tu: nowości, forum, pomoc, recenzje, gry, aplikacje.'
|
||||||
category = 'Android, mobile'
|
category = 'Android, mobile'
|
||||||
language = 'pl'
|
language = 'pl'
|
||||||
use_embedded_content = True
|
use_embedded_content = True
|
||||||
cover_url = 'http://android.com.pl/wp-content/themes/android/images/logo.png'
|
cover_url = 'http://android.com.pl/wp-content/themes/android/images/logo.png'
|
||||||
oldest_article = 8
|
oldest_article = 8
|
||||||
max_articles_per_feed = 100
|
max_articles_per_feed = 100
|
||||||
preprocess_regexps = [(re.compile(ur'<p>.{,1}</p>', re.DOTALL), lambda match: '')]
|
preprocess_regexps = [
|
||||||
feeds = [(u'Android', u'http://android.com.pl/feed/')]
|
(re.compile(ur'<p>.{,1}</p>', re.DOTALL), lambda match: '')]
|
||||||
|
feeds = [(u'Android', u'http://android.com.pl/feed/')]
|
||||||
|
@ -3,43 +3,43 @@
|
|||||||
|
|
||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
|
||||||
|
|
||||||
class AdvancedUserRecipe1290663986(BasicNewsRecipe):
|
class AdvancedUserRecipe1290663986(BasicNewsRecipe):
|
||||||
title = u'Animal Pol\u00EDtico'
|
title = u'Animal Pol\u00EDtico'
|
||||||
publisher = u'Animal Pol\u00EDtico'
|
publisher = u'Animal Pol\u00EDtico'
|
||||||
category = u'News, Mexico'
|
category = u'News, Mexico'
|
||||||
description = u'Noticias Pol\u00EDticas'
|
description = u'Noticias Pol\u00EDticas'
|
||||||
__author__ = 'leamsi'
|
__author__ = 'leamsi'
|
||||||
masthead_url = 'http://www.animalpolitico.com/wp-content/themes/animal_mu/images/logo.png'
|
masthead_url = 'http://www.animalpolitico.com/wp-content/themes/animal_mu/images/logo.png'
|
||||||
oldest_article = 1
|
oldest_article = 1
|
||||||
max_articles_per_feed = 100
|
max_articles_per_feed = 100
|
||||||
language = 'es_MX'
|
language = 'es_MX'
|
||||||
|
|
||||||
#feeds = [(u'Animal Politico', u'http://www.animalpolitico.com/feed/')]
|
|
||||||
|
|
||||||
remove_tags_before = dict(name='div', id='main')
|
remove_tags_before = dict(name='div', id='main')
|
||||||
remove_tags = [dict(name='div', attrs={'class':'fb-like-button'})]
|
remove_tags = [dict(name='div', attrs={'class': 'fb-like-button'})]
|
||||||
keep_only_tags = [dict(name='h1', attrs={'class':'entry-title'}),
|
keep_only_tags = [dict(name='h1', attrs={'class': 'entry-title'}),
|
||||||
dict(name='div', attrs={'class':'entry-content'})]
|
dict(name='div', attrs={'class': 'entry-content'})]
|
||||||
remove_javascript = True
|
remove_javascript = True
|
||||||
INDEX = 'http://www.animalpolitico.com/'
|
INDEX = 'http://www.animalpolitico.com/'
|
||||||
|
|
||||||
def generic_parse(self, soup):
|
def generic_parse(self, soup):
|
||||||
articles = []
|
articles = []
|
||||||
for entry in soup.findAll(lambda tag: tag.name == 'li' and tag.has_key('class') and tag['class'].find('hentry') != -1): #soup.findAll('li', 'hentry'):
|
# soup.findAll('li', 'hentry'):
|
||||||
article_url = entry.a['href'] + '?print=yes'
|
for entry in soup.findAll(lambda tag: tag.name == 'li' and tag.has_key('class') and tag['class'].find('hentry') != -1): # noqa
|
||||||
article_title= entry.find('h3', 'entry-title')
|
article_url = entry.a['href'] + '?print=yes'
|
||||||
article_title= self.tag_to_string(article_title)
|
article_title = entry.find('h3', 'entry-title')
|
||||||
|
article_title = self.tag_to_string(article_title)
|
||||||
article_date = entry.find('span', 'the-time')
|
article_date = entry.find('span', 'the-time')
|
||||||
article_date = self.tag_to_string(article_date)
|
article_date = self.tag_to_string(article_date)
|
||||||
article_desc = self.tag_to_string(entry.find('p'))
|
article_desc = self.tag_to_string(entry.find('p'))
|
||||||
|
|
||||||
#print 'Article:',article_title, article_date,article_url
|
# print 'Article:',article_title, article_date,article_url
|
||||||
#print entry['class']
|
# print entry['class']
|
||||||
|
|
||||||
articles.append({'title' : article_title,
|
articles.append({'title': article_title,
|
||||||
'date' : article_date,
|
'date': article_date,
|
||||||
'description' : article_desc,
|
'description': article_desc,
|
||||||
'url' : article_url})
|
'url': article_url})
|
||||||
# Avoid including the multimedia stuff.
|
# Avoid including the multimedia stuff.
|
||||||
if entry['class'].find('last') != -1:
|
if entry['class'].find('last') != -1:
|
||||||
break
|
break
|
||||||
@ -48,56 +48,57 @@ class AdvancedUserRecipe1290663986(BasicNewsRecipe):
|
|||||||
|
|
||||||
def plumaje_parse(self, soup):
|
def plumaje_parse(self, soup):
|
||||||
articles = []
|
articles = []
|
||||||
blogs_soup = soup.find(lambda tag: tag.name == 'ul' and tag.has_key('class') and tag['class'].find('bloglist-fecha') != -1)
|
blogs_soup = soup.find(lambda tag: tag.name == 'ul' and tag.has_key('class') and tag['class'].find('bloglist-fecha') != -1) # noqa
|
||||||
for entry in blogs_soup.findAll('li'):
|
for entry in blogs_soup.findAll('li'):
|
||||||
article_title = entry.p
|
article_title = entry.p
|
||||||
article_url = article_title.a['href'] + '?print=yes'
|
article_url = article_title.a['href'] + '?print=yes'
|
||||||
article_date = article_title.nextSibling
|
article_date = article_title.nextSibling
|
||||||
article_title = self.tag_to_string(article_title)
|
article_title = self.tag_to_string(article_title)
|
||||||
article_date = self.tag_to_string(article_date).replace(u'Last Updated: ', '')
|
article_date = self.tag_to_string(
|
||||||
article_desc = self.tag_to_string(entry.find('h4'))
|
article_date).replace(u'Last Updated: ', '')
|
||||||
|
article_desc = self.tag_to_string(entry.find('h4'))
|
||||||
|
|
||||||
#print 'Article:',article_title, article_date,article_url
|
# print 'Article:',article_title, article_date,article_url
|
||||||
articles.append({'title' : article_title,
|
articles.append({'title': article_title,
|
||||||
'date' : article_date,
|
'date': article_date,
|
||||||
'description' : article_desc,
|
'description': article_desc,
|
||||||
'url' : article_url})
|
'url': article_url})
|
||||||
|
|
||||||
return articles
|
return articles
|
||||||
|
|
||||||
def boca_parse(self, soup):
|
def boca_parse(self, soup):
|
||||||
articles = []
|
articles = []
|
||||||
for entry in soup.findAll(lambda tag: tag.name == 'div' and tag.has_key('class') and tag['class'].find('hentry') != -1): #soup.findAll('li', 'hentry'):
|
# soup.findAll('li', 'hentry'):
|
||||||
article_title= entry.find('h2', 'entry-title')
|
for entry in soup.findAll(lambda tag: tag.name == 'div' and tag.has_key('class') and tag['class'].find('hentry') != -1): # noqa
|
||||||
article_url = article_title.a['href'] + '?print=yes'
|
article_title = entry.find('h2', 'entry-title')
|
||||||
article_title= self.tag_to_string(article_title)
|
article_url = article_title.a['href'] + '?print=yes'
|
||||||
|
article_title = self.tag_to_string(article_title)
|
||||||
article_date = entry.find('span', 'entry-date')
|
article_date = entry.find('span', 'entry-date')
|
||||||
article_date = self.tag_to_string(article_date)
|
article_date = self.tag_to_string(article_date)
|
||||||
article_desc = self.tag_to_string(entry.find('div', 'entry-content'))
|
article_desc = self.tag_to_string(
|
||||||
|
entry.find('div', 'entry-content'))
|
||||||
|
|
||||||
#print 'Article:',article_title, article_date,article_url
|
# print 'Article:',article_title, article_date,article_url
|
||||||
#print entry['class']
|
# print entry['class']
|
||||||
|
|
||||||
articles.append({'title' : article_title,
|
articles.append({'title': article_title,
|
||||||
'date' : article_date,
|
'date': article_date,
|
||||||
'description' : article_desc,
|
'description': article_desc,
|
||||||
'url' : article_url})
|
'url': article_url})
|
||||||
# Avoid including the multimedia stuff.
|
# Avoid including the multimedia stuff.
|
||||||
if entry['class'].find('last') != -1:
|
if entry['class'].find('last') != -1:
|
||||||
break
|
break
|
||||||
|
|
||||||
return articles
|
return articles
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
def parse_index(self):
|
def parse_index(self):
|
||||||
gobierno_soup = self.index_to_soup(self.INDEX+'gobierno/')
|
gobierno_soup = self.index_to_soup(self.INDEX + 'gobierno/')
|
||||||
congreso_soup = self.index_to_soup(self.INDEX+'congreso/')
|
congreso_soup = self.index_to_soup(self.INDEX + 'congreso/')
|
||||||
seguridad_soup = self.index_to_soup(self.INDEX+'seguridad/')
|
seguridad_soup = self.index_to_soup(self.INDEX + 'seguridad/')
|
||||||
comunidad_soup = self.index_to_soup(self.INDEX+'comunidad/')
|
comunidad_soup = self.index_to_soup(self.INDEX + 'comunidad/')
|
||||||
plumaje_soup = self.index_to_soup(self.INDEX+'plumaje/')
|
plumaje_soup = self.index_to_soup(self.INDEX + 'plumaje/')
|
||||||
la_boca_del_lobo_soup = self.index_to_soup(self.INDEX+'category/la-boca-del-lobo/')
|
la_boca_del_lobo_soup = self.index_to_soup(
|
||||||
|
self.INDEX + 'category/la-boca-del-lobo/')
|
||||||
|
|
||||||
gobierno_articles = self.generic_parse(gobierno_soup)
|
gobierno_articles = self.generic_parse(gobierno_soup)
|
||||||
congreso_articles = self.generic_parse(congreso_soup)
|
congreso_articles = self.generic_parse(congreso_soup)
|
||||||
@ -106,6 +107,5 @@ class AdvancedUserRecipe1290663986(BasicNewsRecipe):
|
|||||||
plumaje_articles = self.plumaje_parse(plumaje_soup)
|
plumaje_articles = self.plumaje_parse(plumaje_soup)
|
||||||
la_boca_del_lobo_articles = self.boca_parse(la_boca_del_lobo_soup)
|
la_boca_del_lobo_articles = self.boca_parse(la_boca_del_lobo_soup)
|
||||||
|
|
||||||
|
return [(u'Gobierno', gobierno_articles), (u'Congreso', congreso_articles), (u'Seguridad', seguridad_articles),
|
||||||
return [ (u'Gobierno', gobierno_articles), (u'Congreso', congreso_articles), (u'Seguridad', seguridad_articles),
|
(u'Comunidad', comunidad_articles), (u'Plumaje', plumaje_articles), (u'La Boca del Lobo', la_boca_del_lobo_articles), ]
|
||||||
(u'Comunidad', comunidad_articles), (u'Plumaje', plumaje_articles), (u'La Boca del Lobo', la_boca_del_lobo_articles), ]
|
|
||||||
|
@ -1,6 +1,7 @@
|
|||||||
#-*- coding: utf-8 -*-
|
# -*- coding: utf-8 -*-
|
||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
|
||||||
|
|
||||||
class AntywebRecipe(BasicNewsRecipe):
|
class AntywebRecipe(BasicNewsRecipe):
|
||||||
encoding = 'utf-8'
|
encoding = 'utf-8'
|
||||||
__license__ = 'GPL v3'
|
__license__ = 'GPL v3'
|
||||||
@ -10,38 +11,40 @@ class AntywebRecipe(BasicNewsRecipe):
|
|||||||
title = u'Antyweb'
|
title = u'Antyweb'
|
||||||
category = u'News'
|
category = u'News'
|
||||||
description = u'Blog o internecie i nowych technologiach'
|
description = u'Blog o internecie i nowych technologiach'
|
||||||
cover_url=''
|
cover_url = ''
|
||||||
remove_empty_feeds= True
|
remove_empty_feeds = True
|
||||||
auto_cleanup = False
|
auto_cleanup = False
|
||||||
no_stylesheets=True
|
no_stylesheets = True
|
||||||
use_embedded_content = False
|
use_embedded_content = False
|
||||||
oldest_article = 7
|
oldest_article = 7
|
||||||
max_articles_per_feed = 100
|
max_articles_per_feed = 100
|
||||||
remove_javascript = True
|
remove_javascript = True
|
||||||
simultaneous_downloads = 10
|
simultaneous_downloads = 10
|
||||||
ignore_duplicate_articles = {'title', 'url'} # zignoruj zduplikowane artykuły o takich samych tytułach LUB adresach
|
# zignoruj zduplikowane artykuły o takich samych tytułach LUB adresach
|
||||||
scale_news_images =True
|
ignore_duplicate_articles = {'title', 'url'}
|
||||||
conversion_options = { 'tags' : u'news, aplikacje mobilne, Android, iOS, Windows Phone ',
|
scale_news_images = True
|
||||||
'smarten_punctuation' : True,
|
conversion_options = {'tags': u'news, aplikacje mobilne, Android, iOS, Windows Phone ',
|
||||||
'publisher' : 'AntyWeb'
|
'smarten_punctuation': True,
|
||||||
} # opcje konwersji.
|
'publisher': 'AntyWeb'
|
||||||
|
} # opcje konwersji.
|
||||||
|
|
||||||
keep_only_tags=[]
|
keep_only_tags = []
|
||||||
keep_only_tags.append(dict(name = 'h1'))
|
keep_only_tags.append(dict(name='h1'))
|
||||||
keep_only_tags.append(dict(name = 'article', attrs = {'class' : 'article'}))
|
keep_only_tags.append(dict(name='article', attrs={'class': 'article'}))
|
||||||
remove_tags =[]
|
remove_tags = []
|
||||||
remove_tags.append(dict(name = 'div', attrs = {'class' : 'ac-footer group'}))
|
remove_tags.append(dict(name='div', attrs={'class': 'ac-footer group'}))
|
||||||
|
|
||||||
|
feeds = [
|
||||||
|
(u'News', 'http://feeds.feedburner.com/antyweb'),
|
||||||
|
(u'Felietony', 'http://feeds.feedburner.com/AntywebFelietony'),
|
||||||
|
(u'Apple', 'http://feeds.feedburner.com/AntywebApple'),
|
||||||
|
(u'Gry', 'http://feeds.feedburner.com/AntywebGry'),
|
||||||
|
(u'Mobile', 'http://feeds.feedburner.com/AntywebMobile'),
|
||||||
|
(u'Startups', 'http://feeds.feedburner.com/AntywebStartups'),
|
||||||
|
(u'Google', 'http://feeds.feedburner.com/AntywebGoogle'),
|
||||||
|
(u'Microsoft', 'http://feeds.feedburner.com/AntywebMicrosoft')
|
||||||
|
]
|
||||||
|
|
||||||
feeds = [
|
|
||||||
(u'News', 'http://feeds.feedburner.com/antyweb'),
|
|
||||||
(u'Felietony', 'http://feeds.feedburner.com/AntywebFelietony'),
|
|
||||||
(u'Apple', 'http://feeds.feedburner.com/AntywebApple'),
|
|
||||||
(u'Gry', 'http://feeds.feedburner.com/AntywebGry'),
|
|
||||||
(u'Mobile', 'http://feeds.feedburner.com/AntywebMobile'),
|
|
||||||
(u'Startups', 'http://feeds.feedburner.com/AntywebStartups'),
|
|
||||||
(u'Google', 'http://feeds.feedburner.com/AntywebGoogle'),
|
|
||||||
(u'Microsoft', 'http://feeds.feedburner.com/AntywebMicrosoft')
|
|
||||||
]
|
|
||||||
def preprocess_html(self, soup):
|
def preprocess_html(self, soup):
|
||||||
for alink in soup.findAll('a'):
|
for alink in soup.findAll('a'):
|
||||||
if alink.string is not None:
|
if alink.string is not None:
|
||||||
|
@ -6,21 +6,23 @@ class AssociatedPress(BasicNewsRecipe):
|
|||||||
title = u'Associated Press'
|
title = u'Associated Press'
|
||||||
description = 'Global news'
|
description = 'Global news'
|
||||||
__author__ = 'Krittika Goyal'
|
__author__ = 'Krittika Goyal'
|
||||||
use_embedded_content = False
|
use_embedded_content = False
|
||||||
language = 'en'
|
language = 'en'
|
||||||
no_stylesheets = True
|
no_stylesheets = True
|
||||||
conversion_options = {
|
conversion_options = {
|
||||||
'linearize_tables' : True
|
'linearize_tables': True
|
||||||
}
|
}
|
||||||
keep_only_tags = {'name':'table', 'attrs':{'class':lambda x: x and 'ap-story-table' in x.split()}}
|
keep_only_tags = {'name': 'table', 'attrs': {
|
||||||
|
'class': lambda x: x and 'ap-story-table' in x.split()}}
|
||||||
remove_tags = [
|
remove_tags = [
|
||||||
{'class':['ap-mediabox-table']},
|
{'class': ['ap-mediabox-table']},
|
||||||
{'name':'img', 'src':lambda x: x and '//analytics.' in x},
|
{'name': 'img', 'src': lambda x: x and '//analytics.' in x},
|
||||||
]
|
]
|
||||||
|
|
||||||
def parse_index(self):
|
def parse_index(self):
|
||||||
feeds = []
|
feeds = []
|
||||||
fronts = ('HOME', 'US', 'WORLD', 'BUSINESS', 'TECHNOLOGY', 'SPORTS', 'ENTERTAINMENT', 'HEALTH', 'SCIENCE', 'POLITICS')
|
fronts = ('HOME', 'US', 'WORLD', 'BUSINESS', 'TECHNOLOGY',
|
||||||
|
'SPORTS', 'ENTERTAINMENT', 'HEALTH', 'SCIENCE', 'POLITICS')
|
||||||
for front in fronts:
|
for front in fronts:
|
||||||
feeds.append([front.capitalize(), self.parse_section(front)])
|
feeds.append([front.capitalize(), self.parse_section(front)])
|
||||||
feeds[0][0] = 'Top Stories'
|
feeds[0][0] = 'Top Stories'
|
||||||
@ -28,19 +30,20 @@ class AssociatedPress(BasicNewsRecipe):
|
|||||||
|
|
||||||
def parse_section(self, front):
|
def parse_section(self, front):
|
||||||
self.log('Processing section:', front)
|
self.log('Processing section:', front)
|
||||||
soup = self.index_to_soup('http://hosted.ap.org/dynamic/fronts/%s?SITE=AP' % front)
|
soup = self.index_to_soup(
|
||||||
|
'http://hosted.ap.org/dynamic/fronts/%s?SITE=AP' % front)
|
||||||
|
|
||||||
articles = []
|
articles = []
|
||||||
for x in soup.findAll('p', attrs={'class':['ap-newsbriefitem-p', 'ap-topheadlineitem-p']}):
|
for x in soup.findAll('p', attrs={'class': ['ap-newsbriefitem-p', 'ap-topheadlineitem-p']}):
|
||||||
a = x.find('a', href=True)
|
a = x.find('a', href=True)
|
||||||
title = self.tag_to_string(a)
|
title = self.tag_to_string(a)
|
||||||
url = "http://hosted.ap.org" + a['href']
|
url = "http://hosted.ap.org" + a['href']
|
||||||
p = x.find(attrs={'class':'topheadlinebody'})
|
p = x.find(attrs={'class': 'topheadlinebody'})
|
||||||
desc = ''
|
desc = ''
|
||||||
if p is not None:
|
if p is not None:
|
||||||
desc = self.tag_to_string(p)
|
desc = self.tag_to_string(p)
|
||||||
self.log('\tFound article:', title, '\n\t\t', desc)
|
self.log('\tFound article:', title, '\n\t\t', desc)
|
||||||
articles.append({'title':title, 'url':url})
|
articles.append({'title': title, 'url': url})
|
||||||
|
|
||||||
self.log('\n\n')
|
self.log('\n\n')
|
||||||
|
|
||||||
|
@ -1,8 +1,8 @@
|
|||||||
#!/usr/bin/env python2
|
#!/usr/bin/env python2
|
||||||
__license__ = 'GPL v3'
|
__license__ = 'GPL v3'
|
||||||
__author__ = 'Gabriele Marini, based on Darko Miletic'
|
__author__ = 'Gabriele Marini, based on Darko Miletic'
|
||||||
__copyright__ = '2009-2010, Darko Miletic <darko.miletic at gmail.com>'
|
__copyright__ = '2009-2010, Darko Miletic <darko.miletic at gmail.com>'
|
||||||
description = 'Italian daily newspaper - 14-05-2010'
|
description = 'Italian daily newspaper - 14-05-2010'
|
||||||
|
|
||||||
'''
|
'''
|
||||||
http://www.apcom.NET/
|
http://www.apcom.NET/
|
||||||
@ -10,39 +10,38 @@ http://www.apcom.NET/
|
|||||||
|
|
||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
|
||||||
|
|
||||||
class Apcom(BasicNewsRecipe):
|
class Apcom(BasicNewsRecipe):
|
||||||
__author__ = 'Marini Gabriele'
|
__author__ = 'Marini Gabriele'
|
||||||
description = 'Italian daily newspaper'
|
description = 'Italian daily newspaper'
|
||||||
|
|
||||||
cover_url = 'http://www.apcom.net/img/logoAP.gif'
|
cover_url = 'http://www.apcom.net/img/logoAP.gif'
|
||||||
title = u'Apcom'
|
title = u'Apcom'
|
||||||
publisher = 'TM News S.p.A.'
|
publisher = 'TM News S.p.A.'
|
||||||
category = 'News, politics, culture, economy, general interest'
|
category = 'News, politics, culture, economy, general interest'
|
||||||
|
|
||||||
language = 'it'
|
language = 'it'
|
||||||
timefmt = '[%a, %d %b, %Y]'
|
timefmt = '[%a, %d %b, %Y]'
|
||||||
|
|
||||||
oldest_article = 7
|
oldest_article = 7
|
||||||
max_articles_per_feed = 50
|
max_articles_per_feed = 50
|
||||||
use_embedded_content = False
|
use_embedded_content = False
|
||||||
recursion = 100
|
recursion = 100
|
||||||
|
|
||||||
no_stylesheets = True
|
no_stylesheets = True
|
||||||
conversion_options = {'linearize_tables':True}
|
conversion_options = {'linearize_tables': True}
|
||||||
remove_javascript = True
|
remove_javascript = True
|
||||||
|
|
||||||
keep_only_tags = [
|
keep_only_tags = [
|
||||||
dict(name='div', attrs={'id':'ag_center'})
|
dict(name='div', attrs={'id': 'ag_center'})
|
||||||
]
|
]
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
feeds = [
|
feeds = [
|
||||||
(u'Globale', u'http://www.apcom.net/rss/globale.xml '),
|
(u'Globale', u'http://www.apcom.net/rss/globale.xml '),
|
||||||
(u'Politica', u'http://www.apcom.net/rss/politica.xml'),
|
(u'Politica', u'http://www.apcom.net/rss/politica.xml'),
|
||||||
(u'Cronaca', u'http://www.apcom.net/rss/cronaca.xml'),
|
(u'Cronaca', u'http://www.apcom.net/rss/cronaca.xml'),
|
||||||
(u'Econimia', u'http://www.apcom.net/rss/economia.xml'),
|
(u'Econimia', u'http://www.apcom.net/rss/economia.xml'),
|
||||||
(u'Esteri', u'http://www.apcom.net/rss/esteri.xml'),
|
(u'Esteri', u'http://www.apcom.net/rss/esteri.xml'),
|
||||||
(u'Cultura', u'http://www.apcom.net/rss/cultura.xml'),
|
(u'Cultura', u'http://www.apcom.net/rss/cultura.xml'),
|
||||||
(u'Sport', u'http://www.apcom.net/rss/sport.xml')
|
(u'Sport', u'http://www.apcom.net/rss/sport.xml')
|
||||||
]
|
]
|
||||||
|
@ -1,28 +1,30 @@
|
|||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
|
||||||
|
|
||||||
class APOD(BasicNewsRecipe):
|
class APOD(BasicNewsRecipe):
|
||||||
title = u'Astronomy Picture of the Day'
|
title = u'Astronomy Picture of the Day'
|
||||||
__author__ = 'Starson17'
|
__author__ = 'Starson17'
|
||||||
description = 'Astronomy Pictures'
|
description = 'Astronomy Pictures'
|
||||||
language = 'en'
|
language = 'en'
|
||||||
use_embedded_content = False
|
use_embedded_content = False
|
||||||
no_stylesheets = True
|
no_stylesheets = True
|
||||||
cover_url = 'http://apod.nasa.gov/apod/image/1003/m78_torregrosa.jpg'
|
cover_url = 'http://apod.nasa.gov/apod/image/1003/m78_torregrosa.jpg'
|
||||||
remove_javascript = True
|
remove_javascript = True
|
||||||
recursions = 0
|
recursions = 0
|
||||||
oldest_article = 14
|
oldest_article = 14
|
||||||
remove_attributes = ['onmouseover', 'onmouseout']
|
remove_attributes = ['onmouseover', 'onmouseout']
|
||||||
|
|
||||||
feeds = [
|
feeds = [
|
||||||
(u'Astronomy Picture of the Day', u'http://apod.nasa.gov/apod.rss')
|
(u'Astronomy Picture of the Day', u'http://apod.nasa.gov/apod.rss')
|
||||||
]
|
]
|
||||||
|
|
||||||
extra_css = '''
|
extra_css = '''
|
||||||
h1{font-family:Arial,Helvetica,sans-serif; font-weight:bold;font-size:large;}
|
h1{font-family:Arial,Helvetica,sans-serif; font-weight:bold;font-size:large;}
|
||||||
h2{font-family:Arial,Helvetica,sans-serif; font-weight:normal;font-size:small;}
|
h2{font-family:Arial,Helvetica,sans-serif; font-weight:normal;font-size:small;}
|
||||||
p{font-family:Arial,Helvetica,sans-serif;font-size:small;}
|
p{font-family:Arial,Helvetica,sans-serif;font-size:small;}
|
||||||
body{font-family:Helvetica,Arial,sans-serif;font-size:small;}
|
body{font-family:Helvetica,Arial,sans-serif;font-size:small;}
|
||||||
'''
|
'''
|
||||||
|
|
||||||
def postprocess_html(self, soup, first_fetch):
|
def postprocess_html(self, soup, first_fetch):
|
||||||
center_tags = soup.findAll(['center'])
|
center_tags = soup.findAll(['center'])
|
||||||
p_tags = soup.findAll(['p'])
|
p_tags = soup.findAll(['p'])
|
||||||
@ -35,4 +37,3 @@ class APOD(BasicNewsRecipe):
|
|||||||
for tag in last2_p:
|
for tag in last2_p:
|
||||||
tag.extract()
|
tag.extract()
|
||||||
return soup
|
return soup
|
||||||
|
|
||||||
|
@ -9,18 +9,19 @@ appfunds.blogspot.com
|
|||||||
|
|
||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
|
||||||
|
|
||||||
class app_funds(BasicNewsRecipe):
|
class app_funds(BasicNewsRecipe):
|
||||||
title = u'APP Funds'
|
title = u'APP Funds'
|
||||||
__author__ = 'teepel <teepel44@gmail.com>'
|
__author__ = 'teepel <teepel44@gmail.com>'
|
||||||
language = 'pl'
|
language = 'pl'
|
||||||
description ='Blog inwestora dla inwestorów i oszczędzających'
|
description = 'Blog inwestora dla inwestorów i oszczędzających'
|
||||||
INDEX='http://appfunds.blogspot.com'
|
INDEX = 'http://appfunds.blogspot.com'
|
||||||
remove_empty_feeds= True
|
remove_empty_feeds = True
|
||||||
oldest_article = 7
|
oldest_article = 7
|
||||||
max_articles_per_feed = 100
|
max_articles_per_feed = 100
|
||||||
simultaneous_downloads = 5
|
simultaneous_downloads = 5
|
||||||
remove_javascript=True
|
remove_javascript = True
|
||||||
no_stylesheets=True
|
no_stylesheets = True
|
||||||
auto_cleanup = True
|
auto_cleanup = True
|
||||||
|
|
||||||
feeds = [(u'blog', u'http://feeds.feedburner.com/blogspot/etVI')]
|
feeds = [(u'blog', u'http://feeds.feedburner.com/blogspot/etVI')]
|
||||||
|
@ -1,12 +1,14 @@
|
|||||||
# vim:fileencoding=UTF-8
|
# vim:fileencoding=UTF-8
|
||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
__license__ = 'GPL v3'
|
__license__ = 'GPL v3'
|
||||||
__copyright__ = '2013-2015, Eddie Lau'
|
__copyright__ = '2013-2015, Eddie Lau'
|
||||||
__Date__ = ''
|
__Date__ = ''
|
||||||
|
|
||||||
from calibre import (__appname__, force_unicode, strftime)
|
from calibre import (__appname__, force_unicode, strftime)
|
||||||
from calibre.utils.date import now as nowf
|
from calibre.utils.date import now as nowf
|
||||||
import os, datetime, re
|
import os
|
||||||
|
import datetime
|
||||||
|
import re
|
||||||
from calibre.web.feeds.recipes import BasicNewsRecipe
|
from calibre.web.feeds.recipes import BasicNewsRecipe
|
||||||
from contextlib import nested
|
from contextlib import nested
|
||||||
from calibre.ebooks.BeautifulSoup import BeautifulSoup
|
from calibre.ebooks.BeautifulSoup import BeautifulSoup
|
||||||
@ -15,10 +17,11 @@ from calibre.ebooks.metadata.toc import TOC
|
|||||||
from calibre.ebooks.metadata import MetaInformation
|
from calibre.ebooks.metadata import MetaInformation
|
||||||
from calibre.utils.localization import canonicalize_lang
|
from calibre.utils.localization import canonicalize_lang
|
||||||
|
|
||||||
|
|
||||||
class AppleDaily(BasicNewsRecipe):
|
class AppleDaily(BasicNewsRecipe):
|
||||||
title = u'蘋果日報 (香港)'
|
title = u'蘋果日報 (香港)'
|
||||||
__author__ = 'Eddie Lau'
|
__author__ = 'Eddie Lau'
|
||||||
publisher = '蘋果日報'
|
publisher = '蘋果日報'
|
||||||
oldest_article = 1
|
oldest_article = 1
|
||||||
max_articles_per_feed = 100
|
max_articles_per_feed = 100
|
||||||
auto_cleanup = False
|
auto_cleanup = False
|
||||||
@ -26,48 +29,48 @@ class AppleDaily(BasicNewsRecipe):
|
|||||||
encoding = 'utf-8'
|
encoding = 'utf-8'
|
||||||
auto_cleanup = False
|
auto_cleanup = False
|
||||||
remove_javascript = True
|
remove_javascript = True
|
||||||
use_embedded_content = False
|
use_embedded_content = False
|
||||||
no_stylesheets = True
|
no_stylesheets = True
|
||||||
description = 'http://hkm.appledaily.com/'
|
description = 'http://hkm.appledaily.com/'
|
||||||
category = 'Chinese, News, Hong Kong'
|
category = 'Chinese, News, Hong Kong'
|
||||||
masthead_url = 'http://upload.wikimedia.org/wikipedia/zh/c/cf/AppleDailyLogo1.png'
|
masthead_url = 'http://upload.wikimedia.org/wikipedia/zh/c/cf/AppleDailyLogo1.png'
|
||||||
|
|
||||||
extra_css = 'img {display: block; margin-left: auto; margin-right: auto; margin-top: 10px; margin-bottom: 10px; max-height:90%;} h1 {font-size:200%; text-align:left; font-weight:bold;} p[class=video-caption] {font-size:50%; margin-left:auto; margin-right:auto;}'
|
extra_css = 'img {display: block; margin-left: auto; margin-right: auto; margin-top: 10px; margin-bottom: 10px; max-height:90%;} h1 {font-size:200%; text-align:left; font-weight:bold;} p[class=video-caption] {font-size:50%; margin-left:auto; margin-right:auto;}' # noqa
|
||||||
keep_only_tags = [dict(name='div', attrs={'id':'content-article'})]
|
keep_only_tags = [dict(name='div', attrs={'id': 'content-article'})]
|
||||||
remove_tags = [dict(name='div', attrs={'class':'prev-next-btn'}),
|
remove_tags = [dict(name='div', attrs={'class': 'prev-next-btn'}),
|
||||||
dict(name='p', attrs={'class':'next'})]
|
dict(name='p', attrs={'class': 'next'})]
|
||||||
|
|
||||||
def get_dtlocal(self):
|
def get_dtlocal(self):
|
||||||
dt_utc = datetime.datetime.utcnow()
|
dt_utc = datetime.datetime.utcnow()
|
||||||
# convert UTC to local hk time - at HKT 6am, all news are available
|
# convert UTC to local hk time - at HKT 6am, all news are available
|
||||||
return dt_utc + datetime.timedelta(8.0/24) - datetime.timedelta(6.0/24)
|
return dt_utc + datetime.timedelta(8.0 / 24) - datetime.timedelta(6.0 / 24)
|
||||||
|
|
||||||
def get_fetchdate(self):
|
def get_fetchdate(self):
|
||||||
if __Date__ <> '':
|
if __Date__ != '':
|
||||||
return __Date__
|
return __Date__
|
||||||
else:
|
else:
|
||||||
return self.get_dtlocal().strftime("%Y%m%d")
|
return self.get_dtlocal().strftime("%Y%m%d")
|
||||||
|
|
||||||
def get_fetchformatteddate(self):
|
def get_fetchformatteddate(self):
|
||||||
if __Date__ <> '':
|
if __Date__ != '':
|
||||||
return __Date__[0:4]+'-'+__Date__[4:6]+'-'+__Date__[6:8]
|
return __Date__[0:4] + '-' + __Date__[4:6] + '-' + __Date__[6:8]
|
||||||
else:
|
else:
|
||||||
return self.get_dtlocal().strftime("%Y-%m-%d")
|
return self.get_dtlocal().strftime("%Y-%m-%d")
|
||||||
|
|
||||||
def get_fetchyear(self):
|
def get_fetchyear(self):
|
||||||
if __Date__ <> '':
|
if __Date__ != '':
|
||||||
return __Date__[0:4]
|
return __Date__[0:4]
|
||||||
else:
|
else:
|
||||||
return self.get_dtlocal().strftime("%Y")
|
return self.get_dtlocal().strftime("%Y")
|
||||||
|
|
||||||
def get_fetchmonth(self):
|
def get_fetchmonth(self):
|
||||||
if __Date__ <> '':
|
if __Date__ != '':
|
||||||
return __Date__[4:6]
|
return __Date__[4:6]
|
||||||
else:
|
else:
|
||||||
return self.get_dtlocal().strftime("%m")
|
return self.get_dtlocal().strftime("%m")
|
||||||
|
|
||||||
def get_fetchday(self):
|
def get_fetchday(self):
|
||||||
if __Date__ <> '':
|
if __Date__ != '':
|
||||||
return __Date__[6:8]
|
return __Date__[6:8]
|
||||||
else:
|
else:
|
||||||
return self.get_dtlocal().strftime("%d")
|
return self.get_dtlocal().strftime("%d")
|
||||||
@ -78,7 +81,7 @@ class AppleDaily(BasicNewsRecipe):
|
|||||||
|
|
||||||
def get_cover_url(self):
|
def get_cover_url(self):
|
||||||
soup = self.index_to_soup('http://hkm.appledaily.com/')
|
soup = self.index_to_soup('http://hkm.appledaily.com/')
|
||||||
cover = soup.find(attrs={'class':'top-news'}).get('src', False)
|
cover = soup.find(attrs={'class': 'top-news'}).get('src', False)
|
||||||
br = BasicNewsRecipe.get_browser(self)
|
br = BasicNewsRecipe.get_browser(self)
|
||||||
try:
|
try:
|
||||||
br.open(cover)
|
br.open(cover)
|
||||||
@ -90,12 +93,12 @@ class AppleDaily(BasicNewsRecipe):
|
|||||||
if first and hasattr(self, 'add_toc_thumbnail'):
|
if first and hasattr(self, 'add_toc_thumbnail'):
|
||||||
picdiv = soup.find('img')
|
picdiv = soup.find('img')
|
||||||
if picdiv is not None:
|
if picdiv is not None:
|
||||||
self.add_toc_thumbnail(article,picdiv['src'])
|
self.add_toc_thumbnail(article, picdiv['src'])
|
||||||
|
|
||||||
def parse_index(self):
|
def parse_index(self):
|
||||||
feeds = []
|
feeds = []
|
||||||
soup = self.index_to_soup('http://hkm.appledaily.com/')
|
soup = self.index_to_soup('http://hkm.appledaily.com/')
|
||||||
ul = soup.find(attrs={'class':'menu'})
|
ul = soup.find(attrs={'class': 'menu'})
|
||||||
sectionList = []
|
sectionList = []
|
||||||
for li in ul.findAll('li'):
|
for li in ul.findAll('li'):
|
||||||
relativea = li.find('a', href=True).get('href', False)
|
relativea = li.find('a', href=True).get('href', False)
|
||||||
@ -111,13 +114,14 @@ class AppleDaily(BasicNewsRecipe):
|
|||||||
|
|
||||||
def parse_section(self, url):
|
def parse_section(self, url):
|
||||||
soup = self.index_to_soup(url)
|
soup = self.index_to_soup(url)
|
||||||
ul = soup.find(attrs={'class':'list'})
|
ul = soup.find(attrs={'class': 'list'})
|
||||||
current_articles = []
|
current_articles = []
|
||||||
for li in ul.findAll('li'):
|
for li in ul.findAll('li'):
|
||||||
a = li.find('a', href=True)
|
a = li.find('a', href=True)
|
||||||
title = li.find('p', text=True).strip()
|
title = li.find('p', text=True).strip()
|
||||||
if a is not None:
|
if a is not None:
|
||||||
current_articles.append({'title': title, 'url':'http://hkm.appledaily.com/' + a.get('href', False)})
|
current_articles.append(
|
||||||
|
{'title': title, 'url': 'http://hkm.appledaily.com/' + a.get('href', False)})
|
||||||
pass
|
pass
|
||||||
return current_articles
|
return current_articles
|
||||||
|
|
||||||
@ -131,7 +135,8 @@ class AppleDaily(BasicNewsRecipe):
|
|||||||
mi.publisher = __appname__
|
mi.publisher = __appname__
|
||||||
mi.author_sort = __appname__
|
mi.author_sort = __appname__
|
||||||
if self.publication_type:
|
if self.publication_type:
|
||||||
mi.publication_type = 'periodical:'+self.publication_type+':'+self.short_title()
|
mi.publication_type = 'periodical:' + \
|
||||||
|
self.publication_type + ':' + self.short_title()
|
||||||
mi.timestamp = nowf()
|
mi.timestamp = nowf()
|
||||||
article_titles, aseen = [], set()
|
article_titles, aseen = [], set()
|
||||||
for f in feeds:
|
for f in feeds:
|
||||||
@ -144,15 +149,16 @@ class AppleDaily(BasicNewsRecipe):
|
|||||||
if not isinstance(mi.comments, unicode):
|
if not isinstance(mi.comments, unicode):
|
||||||
mi.comments = mi.comments.decode('utf-8', 'replace')
|
mi.comments = mi.comments.decode('utf-8', 'replace')
|
||||||
mi.comments += ('\n\n' + _('Articles in this issue: ') + '\n' +
|
mi.comments += ('\n\n' + _('Articles in this issue: ') + '\n' +
|
||||||
'\n\n'.join(article_titles))
|
'\n\n'.join(article_titles))
|
||||||
|
|
||||||
language = canonicalize_lang(self.language)
|
language = canonicalize_lang(self.language)
|
||||||
if language is not None:
|
if language is not None:
|
||||||
mi.language = language
|
mi.language = language
|
||||||
# This one affects the pub date shown in kindle title
|
# This one affects the pub date shown in kindle title
|
||||||
#mi.pubdate = nowf()
|
# mi.pubdate = nowf()
|
||||||
# now appears to need the time field to be > 12.00noon as well
|
# now appears to need the time field to be > 12.00noon as well
|
||||||
mi.pubdate = datetime.datetime(int(self.get_fetchyear()), int(self.get_fetchmonth()), int(self.get_fetchday()), 12, 30, 0)
|
mi.pubdate = datetime.datetime(int(self.get_fetchyear()), int(
|
||||||
|
self.get_fetchmonth()), int(self.get_fetchday()), 12, 30, 0)
|
||||||
opf_path = os.path.join(dir, 'index.opf')
|
opf_path = os.path.join(dir, 'index.opf')
|
||||||
ncx_path = os.path.join(dir, 'index.ncx')
|
ncx_path = os.path.join(dir, 'index.ncx')
|
||||||
|
|
||||||
@ -161,12 +167,14 @@ class AppleDaily(BasicNewsRecipe):
|
|||||||
mp = getattr(self, 'masthead_path', None)
|
mp = getattr(self, 'masthead_path', None)
|
||||||
if mp is not None and os.access(mp, os.R_OK):
|
if mp is not None and os.access(mp, os.R_OK):
|
||||||
from calibre.ebooks.metadata.opf2 import Guide
|
from calibre.ebooks.metadata.opf2 import Guide
|
||||||
ref = Guide.Reference(os.path.basename(self.masthead_path), os.getcwdu())
|
ref = Guide.Reference(os.path.basename(
|
||||||
|
self.masthead_path), os.getcwdu())
|
||||||
ref.type = 'masthead'
|
ref.type = 'masthead'
|
||||||
ref.title = 'Masthead Image'
|
ref.title = 'Masthead Image'
|
||||||
opf.guide.append(ref)
|
opf.guide.append(ref)
|
||||||
|
|
||||||
manifest = [os.path.join(dir, 'feed_%d'%i) for i in range(len(feeds))]
|
manifest = [os.path.join(dir, 'feed_%d' % i)
|
||||||
|
for i in range(len(feeds))]
|
||||||
manifest.append(os.path.join(dir, 'index.html'))
|
manifest.append(os.path.join(dir, 'index.html'))
|
||||||
manifest.append(os.path.join(dir, 'index.ncx'))
|
manifest.append(os.path.join(dir, 'index.ncx'))
|
||||||
|
|
||||||
@ -175,7 +183,7 @@ class AppleDaily(BasicNewsRecipe):
|
|||||||
if cpath is None:
|
if cpath is None:
|
||||||
pf = open(os.path.join(dir, 'cover.jpg'), 'wb')
|
pf = open(os.path.join(dir, 'cover.jpg'), 'wb')
|
||||||
if self.default_cover(pf):
|
if self.default_cover(pf):
|
||||||
cpath = pf.name
|
cpath = pf.name
|
||||||
if cpath is not None and os.access(cpath, os.R_OK):
|
if cpath is not None and os.access(cpath, os.R_OK):
|
||||||
opf.cover = cpath
|
opf.cover = cpath
|
||||||
manifest.append(cpath)
|
manifest.append(cpath)
|
||||||
@ -197,12 +205,11 @@ class AppleDaily(BasicNewsRecipe):
|
|||||||
self.play_order_counter = 0
|
self.play_order_counter = 0
|
||||||
self.play_order_map = {}
|
self.play_order_map = {}
|
||||||
|
|
||||||
|
|
||||||
def feed_index(num, parent):
|
def feed_index(num, parent):
|
||||||
f = feeds[num]
|
f = feeds[num]
|
||||||
for j, a in enumerate(f):
|
for j, a in enumerate(f):
|
||||||
if getattr(a, 'downloaded', False):
|
if getattr(a, 'downloaded', False):
|
||||||
adir = 'feed_%d/article_%d/'%(num, j)
|
adir = 'feed_%d/article_%d/' % (num, j)
|
||||||
auth = a.author
|
auth = a.author
|
||||||
if not auth:
|
if not auth:
|
||||||
auth = None
|
auth = None
|
||||||
@ -212,16 +219,18 @@ class AppleDaily(BasicNewsRecipe):
|
|||||||
else:
|
else:
|
||||||
desc = self.description_limiter(desc)
|
desc = self.description_limiter(desc)
|
||||||
tt = a.toc_thumbnail if a.toc_thumbnail else None
|
tt = a.toc_thumbnail if a.toc_thumbnail else None
|
||||||
entries.append('%sindex.html'%adir)
|
entries.append('%sindex.html' % adir)
|
||||||
po = self.play_order_map.get(entries[-1], None)
|
po = self.play_order_map.get(entries[-1], None)
|
||||||
if po is None:
|
if po is None:
|
||||||
self.play_order_counter += 1
|
self.play_order_counter += 1
|
||||||
po = self.play_order_counter
|
po = self.play_order_counter
|
||||||
parent.add_item('%sindex.html'%adir, None,
|
parent.add_item('%sindex.html' % adir, None,
|
||||||
a.title if a.title else _('Untitled Article'),
|
a.title if a.title else _(
|
||||||
play_order=po, author=auth,
|
'Untitled Article'),
|
||||||
description=desc, toc_thumbnail=tt)
|
play_order=po, author=auth,
|
||||||
last = os.path.join(self.output_dir, ('%sindex.html'%adir).replace('/', os.sep))
|
description=desc, toc_thumbnail=tt)
|
||||||
|
last = os.path.join(
|
||||||
|
self.output_dir, ('%sindex.html' % adir).replace('/', os.sep))
|
||||||
for sp in a.sub_pages:
|
for sp in a.sub_pages:
|
||||||
prefix = os.path.commonprefix([opf_path, sp])
|
prefix = os.path.commonprefix([opf_path, sp])
|
||||||
relp = sp[len(prefix):]
|
relp = sp[len(prefix):]
|
||||||
@ -234,12 +243,14 @@ class AppleDaily(BasicNewsRecipe):
|
|||||||
soup = BeautifulSoup(src)
|
soup = BeautifulSoup(src)
|
||||||
body = soup.find('body')
|
body = soup.find('body')
|
||||||
if body is not None:
|
if body is not None:
|
||||||
prefix = '/'.join('..'for i in range(2*len(re.findall(r'link\d+', last))))
|
prefix = '/'.join('..'for i in range(2 *
|
||||||
|
len(re.findall(r'link\d+', last))))
|
||||||
templ = self.navbar.generate(True, num, j, len(f),
|
templ = self.navbar.generate(True, num, j, len(f),
|
||||||
not self.has_single_feed,
|
not self.has_single_feed,
|
||||||
a.orig_url, __appname__, prefix=prefix,
|
a.orig_url, __appname__, prefix=prefix,
|
||||||
center=self.center_navbar)
|
center=self.center_navbar)
|
||||||
elem = BeautifulSoup(templ.render(doctype='xhtml').decode('utf-8')).find('div')
|
elem = BeautifulSoup(templ.render(
|
||||||
|
doctype='xhtml').decode('utf-8')).find('div')
|
||||||
body.insert(len(body.contents), elem)
|
body.insert(len(body.contents), elem)
|
||||||
with open(last, 'wb') as fi:
|
with open(last, 'wb') as fi:
|
||||||
fi.write(unicode(soup).encode('utf-8'))
|
fi.write(unicode(soup).encode('utf-8'))
|
||||||
@ -248,7 +259,7 @@ class AppleDaily(BasicNewsRecipe):
|
|||||||
|
|
||||||
if len(feeds) > 1:
|
if len(feeds) > 1:
|
||||||
for i, f in enumerate(feeds):
|
for i, f in enumerate(feeds):
|
||||||
entries.append('feed_%d/index.html'%i)
|
entries.append('feed_%d/index.html' % i)
|
||||||
po = self.play_order_map.get(entries[-1], None)
|
po = self.play_order_map.get(entries[-1], None)
|
||||||
if po is None:
|
if po is None:
|
||||||
self.play_order_counter += 1
|
self.play_order_counter += 1
|
||||||
@ -259,11 +270,11 @@ class AppleDaily(BasicNewsRecipe):
|
|||||||
desc = getattr(f, 'description', None)
|
desc = getattr(f, 'description', None)
|
||||||
if not desc:
|
if not desc:
|
||||||
desc = None
|
desc = None
|
||||||
feed_index(i, toc.add_item('feed_%d/index.html'%i, None,
|
feed_index(i, toc.add_item('feed_%d/index.html' % i, None,
|
||||||
f.title, play_order=po, description=desc, author=auth))
|
f.title, play_order=po, description=desc, author=auth))
|
||||||
|
|
||||||
else:
|
else:
|
||||||
entries.append('feed_%d/index.html'%0)
|
entries.append('feed_%d/index.html' % 0)
|
||||||
feed_index(0, toc)
|
feed_index(0, toc)
|
||||||
|
|
||||||
for i, p in enumerate(entries):
|
for i, p in enumerate(entries):
|
||||||
@ -273,5 +284,3 @@ class AppleDaily(BasicNewsRecipe):
|
|||||||
|
|
||||||
with nested(open(opf_path, 'wb'), open(ncx_path, 'wb')) as (opf_file, ncx_file):
|
with nested(open(opf_path, 'wb'), open(ncx_path, 'wb')) as (opf_file, ncx_file):
|
||||||
opf.render(opf_file, ncx_file)
|
opf.render(opf_file, ncx_file)
|
||||||
|
|
||||||
|
|
||||||
|
@ -34,12 +34,12 @@ class AppledailyTW(BasicNewsRecipe):
|
|||||||
{'name': 'hr'}
|
{'name': 'hr'}
|
||||||
]
|
]
|
||||||
conversion_options = {
|
conversion_options = {
|
||||||
'title' : title,
|
'title': title,
|
||||||
'comments' : description,
|
'comments': description,
|
||||||
'tags' : category,
|
'tags': category,
|
||||||
'language' : language,
|
'language': language,
|
||||||
'publisher' : publisher,
|
'publisher': publisher,
|
||||||
'authors' : publisher,
|
'authors': publisher,
|
||||||
'linearize_tables': True
|
'linearize_tables': True
|
||||||
}
|
}
|
||||||
feeds = [
|
feeds = [
|
||||||
@ -105,5 +105,6 @@ class AppledailyTW(BasicNewsRecipe):
|
|||||||
|
|
||||||
def preprocess_raw_html(self, raw_html, url):
|
def preprocess_raw_html(self, raw_html, url):
|
||||||
raw_html = re.sub(ur'<a href=".*?<br><br>.*?<\/a>', '', raw_html)
|
raw_html = re.sub(ur'<a href=".*?<br><br>.*?<\/a>', '', raw_html)
|
||||||
raw_html = re.sub(ur'<title>(.*?)[\s]+\|.*<\/title>', '<title>\1<\/title>', raw_html)
|
raw_html = re.sub(
|
||||||
|
ur'<title>(.*?)[\s]+\|.*<\/title>', '<title>\1<\/title>', raw_html)
|
||||||
return raw_html
|
return raw_html
|
||||||
|
@ -2,21 +2,22 @@
|
|||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
|
||||||
|
|
||||||
class BasicUserRecipe1395137685(BasicNewsRecipe):
|
class BasicUserRecipe1395137685(BasicNewsRecipe):
|
||||||
title = u'Applefobia'
|
title = u'Applefobia'
|
||||||
__author__ = 'koliberek'
|
__author__ = 'koliberek'
|
||||||
oldest_article = 7
|
oldest_article = 7
|
||||||
max_articles_per_feed = 100
|
max_articles_per_feed = 100
|
||||||
auto_cleanup = True
|
auto_cleanup = True
|
||||||
language = 'pl'
|
language = 'pl'
|
||||||
remove_empty_feeds = True
|
remove_empty_feeds = True
|
||||||
remove_javascript = True
|
remove_javascript = True
|
||||||
conversion_options = {
|
conversion_options = {
|
||||||
'tags' : u'newsy, Apple, humor',
|
'tags': u'newsy, Apple, humor',
|
||||||
'smarten_punctuation' : True,
|
'smarten_punctuation': True,
|
||||||
'authors' : 'Ogrodnik January',
|
'authors': 'Ogrodnik January',
|
||||||
'publisher' : 'Blogspot.pl'
|
'publisher': 'Blogspot.pl'
|
||||||
}
|
}
|
||||||
reverse_article_order = True
|
reverse_article_order = True
|
||||||
|
|
||||||
feeds = [(u'Aktualne', u'http://applefobia.blogspot.com/feeds/posts/default')]
|
feeds = [(u'Aktualne', u'http://applefobia.blogspot.com/feeds/posts/default')]
|
||||||
|
@ -1,22 +1,20 @@
|
|||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
|
||||||
|
|
||||||
class AmericanProspect(BasicNewsRecipe):
|
class AmericanProspect(BasicNewsRecipe):
|
||||||
title = u'American Prospect'
|
title = u'American Prospect'
|
||||||
__author__ = u'Michael Heinz, a.peter'
|
__author__ = u'Michael Heinz, a.peter'
|
||||||
version = 2
|
version = 2
|
||||||
|
|
||||||
oldest_article = 30
|
oldest_article = 30
|
||||||
language = 'en'
|
language = 'en'
|
||||||
max_articles_per_feed = 100
|
max_articles_per_feed = 100
|
||||||
recursions = 0
|
recursions = 0
|
||||||
no_stylesheets = True
|
no_stylesheets = True
|
||||||
remove_javascript = True
|
remove_javascript = True
|
||||||
|
|
||||||
#keep_only_tags = [dict(name='div', attrs={'class':'pad_10L10R'})]
|
|
||||||
#remove_tags = [dict(name='form'), dict(name='div', attrs={'class':['bkt_caption','sharebox noprint','badgebox']})]
|
|
||||||
use_embedded_content = False
|
use_embedded_content = False
|
||||||
|
|
||||||
no_stylesheets = True
|
no_stylesheets = True
|
||||||
auto_cleanup = True
|
auto_cleanup = True
|
||||||
feeds = [(u'Articles', u'feed://www.prospect.org/articles_rss.jsp')]
|
feeds = [(u'Articles', u'feed://www.prospect.org/articles_rss.jsp')]
|
||||||
|
|
||||||
|
@ -1,19 +1,25 @@
|
|||||||
#!/usr/bin/env python2
|
#!/usr/bin/env python2
|
||||||
__license__ = 'GPL v3'
|
__license__ = 'GPL v3'
|
||||||
__copyright__ = 'Ruben Pollan <meskio@sindominio.net>'
|
__copyright__ = 'Ruben Pollan <meskio@sindominio.net>'
|
||||||
__docformat__ = 'restructuredtext en'
|
__docformat__ = 'restructuredtext en'
|
||||||
|
|
||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
|
||||||
|
|
||||||
class AdvancedUserRecipe1335656316(BasicNewsRecipe):
|
class AdvancedUserRecipe1335656316(BasicNewsRecipe):
|
||||||
title = u'AraInfo.org'
|
title = u'AraInfo.org'
|
||||||
__author__ = 'Ruben Pollan'
|
__author__ = 'Ruben Pollan'
|
||||||
description = 'Regional newspaper from Aragon'
|
description = 'Regional newspaper from Aragon'
|
||||||
language = 'es'
|
language = 'es'
|
||||||
|
|
||||||
oldest_article = 7
|
oldest_article = 7
|
||||||
max_articles_per_feed = 100
|
max_articles_per_feed = 100
|
||||||
auto_cleanup = True
|
auto_cleanup = True
|
||||||
cover_url = u'http://arainfo.org/wordpress/wp-content/uploads/2011/10/logo-web_alta.jpg'
|
cover_url = u'http://arainfo.org/wordpress/wp-content/uploads/2011/10/logo-web_alta.jpg'
|
||||||
|
|
||||||
feeds = [(u'Movimientos', u'http://arainfo.org/category/movimientos/feed/'), (u'Econom\xeda', u'http://arainfo.org/category/economia/feed/'), (u'Ecolog\xeda', u'http://arainfo.org/category/ecologia/feed/'), (u'Culturas', u'http://arainfo.org/category/culturas/feed/'), (u'Altavoz', u'http://arainfo.org/category/altavoz/feed/')]
|
feeds = [
|
||||||
|
(u'Movimientos', u'http://arainfo.org/category/movimientos/feed/'),
|
||||||
|
(u'Econom\xeda', u'http://arainfo.org/category/economia/feed/'),
|
||||||
|
(u'Ecolog\xeda', u'http://arainfo.org/category/ecologia/feed/'),
|
||||||
|
(u'Culturas', u'http://arainfo.org/category/culturas/feed/'),
|
||||||
|
(u'Altavoz', u'http://arainfo.org/category/altavoz/feed/')]
|
||||||
|
@ -1,4 +1,4 @@
|
|||||||
__license__ = 'GPL v3'
|
__license__ = 'GPL v3'
|
||||||
__copyright__ = '2011, Darko Miletic <darko.miletic at gmail.com>'
|
__copyright__ = '2011, Darko Miletic <darko.miletic at gmail.com>'
|
||||||
'''
|
'''
|
||||||
www.arabianbusiness.com
|
www.arabianbusiness.com
|
||||||
@ -6,21 +6,21 @@ www.arabianbusiness.com
|
|||||||
|
|
||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
|
||||||
|
|
||||||
class Arabian_Business(BasicNewsRecipe):
|
class Arabian_Business(BasicNewsRecipe):
|
||||||
title = 'Arabian Business'
|
title = 'Arabian Business'
|
||||||
__author__ = 'Darko Miletic'
|
__author__ = 'Darko Miletic'
|
||||||
description = 'Comprehensive Guide to Middle East Business & Gulf Industry News including,Banking & Finance,Construction,Energy,Media & Marketing,Real Estate,Transportation,Travel,Technology,Politics,Healthcare,Lifestyle,Jobs & UAE guide.Top Gulf & Dubai Business News.'
|
description = 'Comprehensive Guide to Middle East Business & Gulf Industry News including,Banking & Finance,Construction,Energy,Media & Marketing,Real Estate,Transportation,Travel,Technology,Politics,Healthcare,Lifestyle,Jobs & UAE guide.Top Gulf & Dubai Business News.' # noqa
|
||||||
publisher = 'Arabian Business Publishing Ltd.'
|
publisher = 'Arabian Business Publishing Ltd.'
|
||||||
category = 'ArabianBusiness.com,Arab Business News,Middle East Business News,Middle East Business,Arab Media News,Industry Events,Middle East Industry News,Arab Business Industry,Dubai Business News,Financial News,UAE Business News,Middle East Press Releases,Gulf News,Arab News,GCC Business News,Banking Finance,Media Marketing,Construction,Oil Gas,Retail,Transportation,Travel Hospitality,Photos,Videos,Life Style,Fashion,United Arab Emirates,UAE,Dubai,Sharjah,Abu Dhabi,Qatar,KSA,Saudi Arabia,Bahrain,Kuwait,Oman,Europe,South Asia,America,Asia,news'
|
oldest_article = 2
|
||||||
oldest_article = 2
|
|
||||||
max_articles_per_feed = 200
|
max_articles_per_feed = 200
|
||||||
no_stylesheets = True
|
no_stylesheets = True
|
||||||
encoding = 'utf8'
|
encoding = 'utf8'
|
||||||
use_embedded_content = False
|
use_embedded_content = False
|
||||||
language = 'en'
|
language = 'en'
|
||||||
remove_empty_feeds = True
|
remove_empty_feeds = True
|
||||||
publication_type = 'newsportal'
|
publication_type = 'newsportal'
|
||||||
masthead_url = 'http://www.arabianbusiness.com/skins/ab.main/gfx/arabianbusiness_logo_sm.gif'
|
masthead_url = 'http://www.arabianbusiness.com/skins/ab.main/gfx/arabianbusiness_logo_sm.gif'
|
||||||
extra_css = """
|
extra_css = """
|
||||||
body{font-family: Georgia,serif }
|
body{font-family: Georgia,serif }
|
||||||
img{margin-bottom: 0.4em; margin-top: 0.4em; display:block}
|
img{margin-bottom: 0.4em; margin-top: 0.4em; display:block}
|
||||||
@ -29,49 +29,46 @@ class Arabian_Business(BasicNewsRecipe):
|
|||||||
"""
|
"""
|
||||||
|
|
||||||
conversion_options = {
|
conversion_options = {
|
||||||
'comment' : description
|
'comment': description, 'publisher': publisher, 'language': language
|
||||||
, 'tags' : category
|
}
|
||||||
, 'publisher' : publisher
|
|
||||||
, 'language' : language
|
|
||||||
}
|
|
||||||
|
|
||||||
remove_tags_before=dict(attrs={'id':'article-title'})
|
remove_tags_before = dict(attrs={'id': 'article-title'})
|
||||||
remove_tags = [
|
remove_tags = [
|
||||||
dict(name=['meta','link','base','iframe','embed','object'])
|
dict(name=['meta', 'link', 'base', 'iframe', 'embed', 'object']), dict(
|
||||||
,dict(attrs={'class':'printfooter'})
|
attrs={'class': 'printfooter'})
|
||||||
]
|
]
|
||||||
remove_attributes=['lang']
|
remove_attributes = ['lang']
|
||||||
|
|
||||||
|
|
||||||
feeds = [
|
feeds = [
|
||||||
(u'Africa' , u'http://www.arabianbusiness.com/world/Africa/?service=rss' )
|
|
||||||
,(u'Americas' , u'http://www.arabianbusiness.com/world/americas/?service=rss' )
|
(u'Africa', u'http://www.arabianbusiness.com/world/Africa/?service=rss'),
|
||||||
,(u'Asia Pacific' , u'http://www.arabianbusiness.com/world/asia-pacific/?service=rss' )
|
(u'Americas', u'http://www.arabianbusiness.com/world/americas/?service=rss'),
|
||||||
,(u'Europe' , u'http://www.arabianbusiness.com/world/europe/?service=rss' )
|
(u'Asia Pacific', u'http://www.arabianbusiness.com/world/asia-pacific/?service=rss'),
|
||||||
,(u'Middle East' , u'http://www.arabianbusiness.com/world/middle-east/?service=rss' )
|
(u'Europe', u'http://www.arabianbusiness.com/world/europe/?service=rss'),
|
||||||
,(u'South Asia' , u'http://www.arabianbusiness.com/world/south-asia/?service=rss' )
|
(u'Middle East', u'http://www.arabianbusiness.com/world/middle-east/?service=rss'),
|
||||||
,(u'Banking & Finance', u'http://www.arabianbusiness.com/industries/banking-finance/?service=rss' )
|
(u'South Asia', u'http://www.arabianbusiness.com/world/south-asia/?service=rss'),
|
||||||
,(u'Construction' , u'http://www.arabianbusiness.com/industries/construction/?service=rss' )
|
(u'Banking & Finance', u'http://www.arabianbusiness.com/industries/banking-finance/?service=rss'),
|
||||||
,(u'Education' , u'http://www.arabianbusiness.com/industries/education/?service=rss' )
|
(u'Construction', u'http://www.arabianbusiness.com/industries/construction/?service=rss'),
|
||||||
,(u'Energy' , u'http://www.arabianbusiness.com/industries/energy/?service=rss' )
|
(u'Education', u'http://www.arabianbusiness.com/industries/education/?service=rss'),
|
||||||
,(u'Healthcare' , u'http://www.arabianbusiness.com/industries/healthcare/?service=rss' )
|
(u'Energy', u'http://www.arabianbusiness.com/industries/energy/?service=rss'),
|
||||||
,(u'Media' , u'http://www.arabianbusiness.com/industries/media/?service=rss' )
|
(u'Healthcare', u'http://www.arabianbusiness.com/industries/healthcare/?service=rss'),
|
||||||
,(u'Real Estate' , u'http://www.arabianbusiness.com/industries/real-estate/?service=rss' )
|
(u'Media', u'http://www.arabianbusiness.com/industries/media/?service=rss'),
|
||||||
,(u'Retail' , u'http://www.arabianbusiness.com/industries/retail/?service=rss' )
|
(u'Real Estate', u'http://www.arabianbusiness.com/industries/real-estate/?service=rss'),
|
||||||
,(u'Technology' , u'http://www.arabianbusiness.com/industries/technology/?service=rss' )
|
(u'Retail', u'http://www.arabianbusiness.com/industries/retail/?service=rss'),
|
||||||
,(u'Transport' , u'http://www.arabianbusiness.com/industries/transport/?service=rss' )
|
(u'Technology', u'http://www.arabianbusiness.com/industries/technology/?service=rss'),
|
||||||
,(u'Travel' , u'http://www.arabianbusiness.com/industries/travel-hospitality/?service=rss')
|
(u'Transport', u'http://www.arabianbusiness.com/industries/transport/?service=rss'),
|
||||||
,(u'Equities' , u'http://www.arabianbusiness.com/markets/equities/?service=rss' )
|
(u'Travel', u'http://www.arabianbusiness.com/industries/travel-hospitality/?service=rss'),
|
||||||
,(u'Commodities' , u'http://www.arabianbusiness.com/markets/commodities/?service=rss' )
|
(u'Equities', u'http://www.arabianbusiness.com/markets/equities/?service=rss'),
|
||||||
,(u'Currencies' , u'http://www.arabianbusiness.com/markets/currencies/?service=rss' )
|
(u'Commodities', u'http://www.arabianbusiness.com/markets/commodities/?service=rss'),
|
||||||
,(u'Market Data' , u'http://www.arabianbusiness.com/markets/market-data/?service=rss' )
|
(u'Currencies', u'http://www.arabianbusiness.com/markets/currencies/?service=rss'),
|
||||||
,(u'Comment' , u'http://www.arabianbusiness.com/opinion/comment/?service=rss' )
|
(u'Market Data', u'http://www.arabianbusiness.com/markets/market-data/?service=rss'),
|
||||||
,(u'Think Tank' , u'http://www.arabianbusiness.com/opinion/think-tank/?service=rss' )
|
(u'Comment', u'http://www.arabianbusiness.com/opinion/comment/?service=rss'),
|
||||||
,(u'Arts' , u'http://www.arabianbusiness.com/lifestyle/arts/?service=rss' )
|
(u'Think Tank', u'http://www.arabianbusiness.com/opinion/think-tank/?service=rss'),
|
||||||
,(u'Cars' , u'http://www.arabianbusiness.com/lifestyle/cars/?service=rss' )
|
(u'Arts', u'http://www.arabianbusiness.com/lifestyle/arts/?service=rss'),
|
||||||
,(u'Food' , u'http://www.arabianbusiness.com/lifestyle/food/?service=rss' )
|
(u'Cars', u'http://www.arabianbusiness.com/lifestyle/cars/?service=rss'),
|
||||||
,(u'Sport' , u'http://www.arabianbusiness.com/lifestyle/sport/?service=rss' )
|
(u'Food', u'http://www.arabianbusiness.com/lifestyle/food/?service=rss'),
|
||||||
]
|
(u'Sport', u'http://www.arabianbusiness.com/lifestyle/sport/?service=rss')
|
||||||
|
]
|
||||||
|
|
||||||
def print_version(self, url):
|
def print_version(self, url):
|
||||||
return url + '?service=printer&page='
|
return url + '?service=printer&page='
|
||||||
@ -81,6 +78,6 @@ class Arabian_Business(BasicNewsRecipe):
|
|||||||
del item['style']
|
del item['style']
|
||||||
for alink in soup.findAll('a'):
|
for alink in soup.findAll('a'):
|
||||||
if alink.string is not None:
|
if alink.string is not None:
|
||||||
tstr = alink.string
|
tstr = alink.string
|
||||||
alink.replaceWith(tstr)
|
alink.replaceWith(tstr)
|
||||||
return soup
|
return soup
|
||||||
|
@ -1,34 +1,32 @@
|
|||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
|
||||||
|
|
||||||
class Arbetaren_SE(BasicNewsRecipe):
|
class Arbetaren_SE(BasicNewsRecipe):
|
||||||
title = u'Arbetaren'
|
title = u'Arbetaren'
|
||||||
__author__ = 'Joakim Lindskog'
|
__author__ = 'Joakim Lindskog'
|
||||||
description = 'Nyheter fr\xc3\xa5n Arbetaren'
|
description = 'Nyheter fr\xc3\xa5n Arbetaren'
|
||||||
publisher = 'Arbetaren'
|
publisher = 'Arbetaren'
|
||||||
category = 'news, politics, socialism, Sweden'
|
category = 'news, politics, socialism, Sweden'
|
||||||
oldest_article = 7
|
oldest_article = 7
|
||||||
delay = 1
|
delay = 1
|
||||||
max_articles_per_feed = 100
|
max_articles_per_feed = 100
|
||||||
no_stylesheets = True
|
no_stylesheets = True
|
||||||
use_embedded_content = False
|
use_embedded_content = False
|
||||||
encoding = 'utf-8'
|
encoding = 'utf-8'
|
||||||
language = 'sv'
|
language = 'sv'
|
||||||
|
|
||||||
conversion_options = {
|
conversion_options = {
|
||||||
'comment' : description
|
'comment': description, 'tags': category, 'publisher': publisher, 'language': language
|
||||||
, 'tags' : category
|
}
|
||||||
, 'publisher' : publisher
|
|
||||||
, 'language' : language
|
|
||||||
}
|
|
||||||
|
|
||||||
keep_only_tags = [dict(name='div', attrs={'id':'article'})]
|
keep_only_tags = [dict(name='div', attrs={'id': 'article'})]
|
||||||
remove_tags_before = dict(name='div', attrs={'id':'article'})
|
remove_tags_before = dict(name='div', attrs={'id': 'article'})
|
||||||
remove_tags_after = dict(name='p',attrs={'id':'byline'})
|
remove_tags_after = dict(name='p', attrs={'id': 'byline'})
|
||||||
remove_tags = [
|
remove_tags = [
|
||||||
dict(name=['object','link','base']),
|
dict(name=['object', 'link', 'base']),
|
||||||
dict(name='p', attrs={'class':'print'}),
|
dict(name='p', attrs={'class': 'print'}),
|
||||||
dict(name='a', attrs={'class':'addthis_button_compact'}),
|
dict(name='a', attrs={'class': 'addthis_button_compact'}),
|
||||||
dict(name='script')
|
dict(name='script')
|
||||||
]
|
]
|
||||||
|
|
||||||
feeds = [(u'Nyheter', u'http://www.arbetaren.se/rss/arbetaren.rss?rev=123')]
|
feeds = [(u'Nyheter', u'http://www.arbetaren.se/rss/arbetaren.rss?rev=123')]
|
||||||
|
@ -3,16 +3,17 @@
|
|||||||
from calibre.web.feeds.recipes import BasicNewsRecipe
|
from calibre.web.feeds.recipes import BasicNewsRecipe
|
||||||
import re
|
import re
|
||||||
|
|
||||||
|
|
||||||
class Arcadia_BBS(BasicNewsRecipe):
|
class Arcadia_BBS(BasicNewsRecipe):
|
||||||
title = u'Arcadia'
|
title = u'Arcadia'
|
||||||
__author__ = 'Masahiro Hasegawa'
|
__author__ = 'Masahiro Hasegawa'
|
||||||
language = 'ja'
|
language = 'ja'
|
||||||
encoding = 'utf8'
|
encoding = 'utf8'
|
||||||
filter_regexps = [r'ad\.jp\.ap\.valuecommerce.com',]
|
filter_regexps = [r'ad\.jp\.ap\.valuecommerce.com', ]
|
||||||
timefmt = '[%Y/%m/%d]'
|
timefmt = '[%Y/%m/%d]'
|
||||||
remove_tags_before = dict(name='a', attrs={'name':'kiji'})
|
remove_tags_before = dict(name='a', attrs={'name': 'kiji'})
|
||||||
|
|
||||||
sid_list = [] #some sotory id
|
sid_list = [] # some sotory id
|
||||||
|
|
||||||
def parse_index(self):
|
def parse_index(self):
|
||||||
result = []
|
result = []
|
||||||
@ -21,15 +22,12 @@ class Arcadia_BBS(BasicNewsRecipe):
|
|||||||
soup = self.index_to_soup(
|
soup = self.index_to_soup(
|
||||||
'http://www.mai-net.net/bbs/sst/sst.php?act=dump&all=%d'
|
'http://www.mai-net.net/bbs/sst/sst.php?act=dump&all=%d'
|
||||||
% sid)
|
% sid)
|
||||||
sec = soup.findAll('a', attrs={'href':re.compile(r'.*?kiji')})
|
sec = soup.findAll('a', attrs={'href': re.compile(r'.*?kiji')})
|
||||||
for s in sec[:-2]:
|
for s in sec[:-2]:
|
||||||
s_result.append(dict(title=s.string,
|
s_result.append(dict(title=s.string,
|
||||||
url="http://www.mai-net.net" + s['href'],
|
url="http://www.mai-net.net" + s['href'],
|
||||||
date=s.parent.parent.parent.findAll('td')[3].string[:-6],
|
date=s.parent.parent.parent.findAll('td')[
|
||||||
description='', content=''))
|
3].string[:-6],
|
||||||
|
description='', content=''))
|
||||||
result.append((s_result[0]['title'], s_result))
|
result.append((s_result[0]['title'], s_result))
|
||||||
return result
|
return result
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
@ -1,6 +1,6 @@
|
|||||||
#!/usr/bin/env python2
|
#!/usr/bin/env python2
|
||||||
|
|
||||||
__license__ = 'GPL v3'
|
__license__ = 'GPL v3'
|
||||||
__copyright__ = 'Copyright 2010 Starson17'
|
__copyright__ = 'Copyright 2010 Starson17'
|
||||||
'''
|
'''
|
||||||
www.arcamax.com
|
www.arcamax.com
|
||||||
@ -10,30 +10,29 @@ import os
|
|||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
from calibre.ptempfile import PersistentTemporaryDirectory
|
from calibre.ptempfile import PersistentTemporaryDirectory
|
||||||
|
|
||||||
|
|
||||||
class Arcamax(BasicNewsRecipe):
|
class Arcamax(BasicNewsRecipe):
|
||||||
title = 'Arcamax'
|
title = 'Arcamax'
|
||||||
__author__ = 'Kovid Goyal'
|
__author__ = 'Kovid Goyal'
|
||||||
description = u'Family Friendly Comics - Customize for more days/comics: Defaults to 7 days, 25 comics - 20 general, 5 editorial.'
|
description = u'Family Friendly Comics - Customize for more days/comics: Defaults to 7 days, 25 comics - 20 general, 5 editorial.'
|
||||||
category = 'news, comics'
|
category = 'news, comics'
|
||||||
language = 'en'
|
language = 'en'
|
||||||
use_embedded_content= False
|
use_embedded_content = False
|
||||||
no_stylesheets = True
|
no_stylesheets = True
|
||||||
remove_javascript = True
|
remove_javascript = True
|
||||||
cover_url = 'http://www.arcamax.com/images/pub/amuse/leftcol/zits.jpg'
|
cover_url = 'http://www.arcamax.com/images/pub/amuse/leftcol/zits.jpg'
|
||||||
|
|
||||||
# ###### USER PREFERENCES - SET COMICS AND NUMBER OF COMICS TO RETRIEVE ########
|
# ###### USER PREFERENCES - SET COMICS AND NUMBER OF COMICS TO RETRIEVE ##
|
||||||
num_comics_to_get = 7
|
num_comics_to_get = 7
|
||||||
# CHOOSE COMIC STRIPS BELOW - REMOVE COMMENT '# ' FROM IN FRONT OF DESIRED STRIPS
|
# CHOOSE COMIC STRIPS BELOW - REMOVE COMMENT '# ' FROM IN FRONT OF DESIRED
|
||||||
|
# STRIPS
|
||||||
|
|
||||||
conversion_options = {'linearize_tables' : True
|
conversion_options = {'linearize_tables': True, 'comment': description, 'tags': category, 'language': language
|
||||||
, 'comment' : description
|
}
|
||||||
, 'tags' : category
|
|
||||||
, 'language' : language
|
|
||||||
}
|
|
||||||
|
|
||||||
keep_only_tags = [
|
keep_only_tags = [
|
||||||
dict(name='header', attrs={'class':'fn-content-header bluelabel'}),
|
dict(name='header', attrs={'class': 'fn-content-header bluelabel'}),
|
||||||
dict(name='figure', attrs={'class':['comic']}),
|
dict(name='figure', attrs={'class': ['comic']}),
|
||||||
]
|
]
|
||||||
|
|
||||||
def parse_index(self):
|
def parse_index(self):
|
||||||
@ -93,18 +92,22 @@ class Arcamax(BasicNewsRecipe):
|
|||||||
num -= 1
|
num -= 1
|
||||||
raw = self.index_to_soup(url, raw=True)
|
raw = self.index_to_soup(url, raw=True)
|
||||||
self.panel_counter += 1
|
self.panel_counter += 1
|
||||||
path = os.path.join(self.panel_tdir, '%d.html' % self.panel_counter)
|
path = os.path.join(self.panel_tdir, '%d.html' %
|
||||||
|
self.panel_counter)
|
||||||
with open(path, 'wb') as f:
|
with open(path, 'wb') as f:
|
||||||
f.write(raw)
|
f.write(raw)
|
||||||
soup = self.index_to_soup(raw)
|
soup = self.index_to_soup(raw)
|
||||||
a = soup.find(name='a', attrs={'class':['prev']})
|
a = soup.find(name='a', attrs={'class': ['prev']})
|
||||||
prev_page_url = 'http://www.arcamax.com' + a['href']
|
prev_page_url = 'http://www.arcamax.com' + a['href']
|
||||||
title = self.tag_to_string(soup.find('title')).partition('|')[0].strip()
|
title = self.tag_to_string(
|
||||||
|
soup.find('title')).partition('|')[0].strip()
|
||||||
if 'for' not in title.split():
|
if 'for' not in title.split():
|
||||||
title = title + ' for today'
|
title = title + ' for today'
|
||||||
date = self.tag_to_string(soup.find(name='span', attrs={'class':['cur']}))
|
date = self.tag_to_string(
|
||||||
|
soup.find(name='span', attrs={'class': ['cur']}))
|
||||||
self.log('\tFound:', title, 'at:', url)
|
self.log('\tFound:', title, 'at:', url)
|
||||||
current_articles.append({'title': title, 'url':'file://' + path , 'description':'', 'date': date})
|
current_articles.append(
|
||||||
|
{'title': title, 'url': 'file://' + path, 'description': '', 'date': date})
|
||||||
if self.test and len(current_articles) >= self.test[1]:
|
if self.test and len(current_articles) >= self.test[1]:
|
||||||
break
|
break
|
||||||
url = prev_page_url
|
url = prev_page_url
|
||||||
|
@ -1,33 +1,35 @@
|
|||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
|
||||||
|
|
||||||
class Archeowiesci(BasicNewsRecipe):
|
class Archeowiesci(BasicNewsRecipe):
|
||||||
title = u'Archeowieści'
|
title = u'Archeowieści'
|
||||||
__author__ = 'fenuks'
|
__author__ = 'fenuks'
|
||||||
category = 'archeology'
|
category = 'archeology'
|
||||||
language = 'pl'
|
language = 'pl'
|
||||||
description = u'Z pasją o przeszłości'
|
description = u'Z pasją o przeszłości'
|
||||||
cover_url='http://archeowiesci.pl/wp-content/uploads/2011/05/Archeowiesci2-115x115.jpg'
|
cover_url = 'http://archeowiesci.pl/wp-content/uploads/2011/05/Archeowiesci2-115x115.jpg'
|
||||||
oldest_article = 7
|
oldest_article = 7
|
||||||
needs_subscription='optional'
|
needs_subscription = 'optional'
|
||||||
max_articles_per_feed = 100
|
max_articles_per_feed = 100
|
||||||
auto_cleanup = True
|
auto_cleanup = True
|
||||||
remove_tags=[dict(name='span', attrs={'class':['post-ratings', 'post-ratings-loading']})]
|
remove_tags = [
|
||||||
feeds = [(u'Archeowieści', u'http://archeowiesci.pl/feed/')]
|
dict(name='span', attrs={'class': ['post-ratings', 'post-ratings-loading']})]
|
||||||
|
feeds = [(u'Archeowieści', u'http://archeowiesci.pl/feed/')]
|
||||||
|
|
||||||
def parse_feeds (self):
|
def parse_feeds(self):
|
||||||
feeds = BasicNewsRecipe.parse_feeds(self)
|
feeds = BasicNewsRecipe.parse_feeds(self)
|
||||||
for feed in feeds:
|
for feed in feeds:
|
||||||
for article in feed.articles[:]:
|
for article in feed.articles[:]:
|
||||||
if self.username is None and 'subskrypcja' in article.title:
|
if self.username is None and 'subskrypcja' in article.title:
|
||||||
feed.articles.remove(article)
|
feed.articles.remove(article)
|
||||||
return feeds
|
return feeds
|
||||||
|
|
||||||
def get_browser(self):
|
def get_browser(self):
|
||||||
br = BasicNewsRecipe.get_browser(self)
|
br = BasicNewsRecipe.get_browser(self)
|
||||||
if self.username is not None and self.password is not None:
|
if self.username is not None and self.password is not None:
|
||||||
br.open('http://archeowiesci.pl/wp-login.php')
|
br.open('http://archeowiesci.pl/wp-login.php')
|
||||||
br.select_form(name='loginform')
|
br.select_form(name='loginform')
|
||||||
br['log'] = self.username
|
br['log'] = self.username
|
||||||
br['pwd'] = self.password
|
br['pwd'] = self.password
|
||||||
br.submit()
|
br.submit()
|
||||||
return br
|
return br
|
||||||
|
@ -1,5 +1,5 @@
|
|||||||
|
|
||||||
__license__ = 'GPL v3'
|
__license__ = 'GPL v3'
|
||||||
__copyright__ = '2013, Darko Miletic <darko.miletic at gmail.com>'
|
__copyright__ = '2013, Darko Miletic <darko.miletic at gmail.com>'
|
||||||
|
|
||||||
'''
|
'''
|
||||||
@ -10,87 +10,85 @@ import time
|
|||||||
from calibre import strftime
|
from calibre import strftime
|
||||||
from calibre.web.feeds.recipes import BasicNewsRecipe
|
from calibre.web.feeds.recipes import BasicNewsRecipe
|
||||||
|
|
||||||
|
|
||||||
class ArgNoticias(BasicNewsRecipe):
|
class ArgNoticias(BasicNewsRecipe):
|
||||||
title = 'ARG Noticias'
|
title = 'ARG Noticias'
|
||||||
__author__ = 'Darko Miletic'
|
__author__ = 'Darko Miletic'
|
||||||
description = 'Ultimas noticias de Argentina'
|
description = 'Ultimas noticias de Argentina'
|
||||||
publisher = 'ARG Noticias'
|
publisher = 'ARG Noticias'
|
||||||
category = 'news, politics, Argentina'
|
category = 'news, politics, Argentina'
|
||||||
oldest_article = 2
|
oldest_article = 2
|
||||||
max_articles_per_feed = 100
|
max_articles_per_feed = 100
|
||||||
no_stylesheets = True
|
no_stylesheets = True
|
||||||
encoding = 'utf-8'
|
encoding = 'utf-8'
|
||||||
use_embedded_content = False
|
use_embedded_content = False
|
||||||
masthead_url = 'http://www.argnoticias.com/images/arg-logo-footer.png'
|
masthead_url = 'http://www.argnoticias.com/images/arg-logo-footer.png'
|
||||||
language = 'es_AR'
|
language = 'es_AR'
|
||||||
publication_type = 'newsportal'
|
publication_type = 'newsportal'
|
||||||
INDEX = 'http://www.argnoticias.com'
|
INDEX = 'http://www.argnoticias.com'
|
||||||
extra_css = ''
|
extra_css = ''
|
||||||
|
|
||||||
conversion_options = {
|
conversion_options = {
|
||||||
'comment' : description
|
'comment': description, 'tags': category, 'publisher': publisher, 'language': language
|
||||||
, 'tags' : category
|
}
|
||||||
, 'publisher': publisher
|
|
||||||
, 'language' : language
|
|
||||||
}
|
|
||||||
|
|
||||||
keep_only_tags = [dict(name='div', attrs={'class':['itemHeader','itemBody','itemAuthorBlock']})]
|
keep_only_tags = [
|
||||||
|
dict(name='div', attrs={'class': ['itemHeader', 'itemBody', 'itemAuthorBlock']})]
|
||||||
|
|
||||||
remove_tags = [
|
remove_tags = [
|
||||||
dict(name=['object','link','base','iframe']),
|
dict(name=['object', 'link', 'base', 'iframe']),
|
||||||
dict(name='div', attrs={'class':['b2jsocial_parent','itemSocialSharing']})
|
dict(name='div', attrs={
|
||||||
]
|
'class': ['b2jsocial_parent', 'itemSocialSharing']})
|
||||||
|
]
|
||||||
|
|
||||||
feeds = [
|
feeds = [
|
||||||
(u'Politica' , u'http://www.argnoticias.com/index.php/politica' )
|
|
||||||
,(u'Economia' , u'http://www.argnoticias.com/index.php/economia' )
|
(u'Politica', u'http://www.argnoticias.com/index.php/politica'),
|
||||||
,(u'Sociedad' , u'http://www.argnoticias.com/index.php/sociedad' )
|
(u'Economia', u'http://www.argnoticias.com/index.php/economia'),
|
||||||
,(u'Mundo' , u'http://www.argnoticias.com/index.php/mundo' )
|
(u'Sociedad', u'http://www.argnoticias.com/index.php/sociedad'),
|
||||||
,(u'Deportes' , u'http://www.argnoticias.com/index.php/deportes' )
|
(u'Mundo', u'http://www.argnoticias.com/index.php/mundo'),
|
||||||
,(u'Espectaculos', u'http://www.argnoticias.com/index.php/espectaculos')
|
(u'Deportes', u'http://www.argnoticias.com/index.php/deportes'),
|
||||||
,(u'Tendencias' , u'http://www.argnoticias.com/index.php/tendencias' )
|
(u'Espectaculos', u'http://www.argnoticias.com/index.php/espectaculos'),
|
||||||
]
|
(u'Tendencias', u'http://www.argnoticias.com/index.php/tendencias')
|
||||||
|
]
|
||||||
|
|
||||||
def parse_index(self):
|
def parse_index(self):
|
||||||
totalfeeds = []
|
totalfeeds = []
|
||||||
lfeeds = self.get_feeds()
|
lfeeds = self.get_feeds()
|
||||||
checker = []
|
checker = []
|
||||||
for feedobj in lfeeds:
|
for feedobj in lfeeds:
|
||||||
feedtitle, feedurl = feedobj
|
feedtitle, feedurl = feedobj
|
||||||
self.report_progress(0, _('Fetching feed')+' %s...'%(feedtitle if feedtitle else feedurl))
|
self.report_progress(0, _('Fetching feed') + ' %s...' %
|
||||||
|
(feedtitle if feedtitle else feedurl))
|
||||||
articles = []
|
articles = []
|
||||||
soup = self.index_to_soup(feedurl)
|
soup = self.index_to_soup(feedurl)
|
||||||
for item in soup.findAll('div', attrs={'class':'Nota'}):
|
for item in soup.findAll('div', attrs={'class': 'Nota'}):
|
||||||
atag = item.find('a', attrs={'class':'moduleItemTitle'})
|
atag = item.find('a', attrs={'class': 'moduleItemTitle'})
|
||||||
ptag = item.find('div', attrs={'class':'moduleItemIntrotext'})
|
ptag = item.find('div', attrs={'class': 'moduleItemIntrotext'})
|
||||||
url = self.INDEX + atag['href']
|
url = self.INDEX + atag['href']
|
||||||
title = self.tag_to_string(atag)
|
title = self.tag_to_string(atag)
|
||||||
description = self.tag_to_string(ptag)
|
description = self.tag_to_string(ptag)
|
||||||
date = strftime("%a, %d %b %Y %H:%M:%S +0000",time.gmtime())
|
date = strftime("%a, %d %b %Y %H:%M:%S +0000", time.gmtime())
|
||||||
if url not in checker:
|
if url not in checker:
|
||||||
checker.append(url)
|
checker.append(url)
|
||||||
articles.append({
|
articles.append({
|
||||||
'title' :title
|
'title': title, 'date': date, 'url': url, 'description': description
|
||||||
,'date' :date
|
})
|
||||||
,'url' :url
|
|
||||||
,'description':description
|
|
||||||
})
|
|
||||||
|
|
||||||
for item in soup.findAll('li'):
|
for item in soup.findAll('li'):
|
||||||
atag = item.find('a', attrs={'class':'moduleItemTitle'})
|
atag = item.find('a', attrs={'class': 'moduleItemTitle'})
|
||||||
if atag:
|
if atag:
|
||||||
ptag = item.find('div', attrs={'class':'moduleItemIntrotext'})
|
ptag = item.find(
|
||||||
url = self.INDEX + atag['href']
|
'div', attrs={'class': 'moduleItemIntrotext'})
|
||||||
title = self.tag_to_string(atag)
|
url = self.INDEX + atag['href']
|
||||||
|
title = self.tag_to_string(atag)
|
||||||
description = self.tag_to_string(ptag)
|
description = self.tag_to_string(ptag)
|
||||||
date = strftime("%a, %d %b %Y %H:%M:%S +0000",time.gmtime())
|
date = strftime(
|
||||||
|
"%a, %d %b %Y %H:%M:%S +0000", time.gmtime())
|
||||||
if url not in checker:
|
if url not in checker:
|
||||||
checker.append(url)
|
checker.append(url)
|
||||||
articles.append({
|
articles.append({
|
||||||
'title' :title
|
'title': title, 'date': date, 'url': url, 'description': description
|
||||||
,'date' :date
|
})
|
||||||
,'url' :url
|
|
||||||
,'description':description
|
|
||||||
})
|
|
||||||
totalfeeds.append((feedtitle, articles))
|
totalfeeds.append((feedtitle, articles))
|
||||||
return totalfeeds
|
return totalfeeds
|
||||||
|
@ -1,68 +1,77 @@
|
|||||||
__license__ = 'GPL v3'
|
__license__ = 'GPL v3'
|
||||||
__copyright__ = '2010, jolo'
|
__copyright__ = '2010, jolo'
|
||||||
'''
|
'''
|
||||||
azrepublic.com
|
azrepublic.com
|
||||||
'''
|
'''
|
||||||
from calibre.web.feeds.recipes import BasicNewsRecipe
|
from calibre.web.feeds.recipes import BasicNewsRecipe
|
||||||
|
|
||||||
|
|
||||||
class AdvancedUserRecipe1307301031(BasicNewsRecipe):
|
class AdvancedUserRecipe1307301031(BasicNewsRecipe):
|
||||||
title = u'AZRepublic'
|
title = u'AZRepublic'
|
||||||
__author__ = 'Jim Olo'
|
__author__ = 'Jim Olo'
|
||||||
language = 'en'
|
language = 'en'
|
||||||
description = "The Arizona Republic is Arizona's leading provider of news and information, and has published a daily newspaper in Phoenix for more than 110 years"
|
description = "The Arizona Republic is Arizona's leading provider of news and information, and has published a daily newspaper in Phoenix for more than 110 years" # noqa
|
||||||
publisher = 'AZRepublic/AZCentral'
|
publisher = 'AZRepublic/AZCentral'
|
||||||
masthead_url = 'http://freedom2t.com/wp-content/uploads/press_az_republic_v2.gif'
|
masthead_url = 'http://freedom2t.com/wp-content/uploads/press_az_republic_v2.gif'
|
||||||
cover_url = 'http://www.valleyleadership.org/Common/Img/2line4c_AZRepublic%20with%20azcentral%20logo.jpg'
|
cover_url = 'http://www.valleyleadership.org/Common/Img/2line4c_AZRepublic%20with%20azcentral%20logo.jpg'
|
||||||
category = 'news, politics, USA, AZ, Arizona'
|
category = 'news, politics, USA, AZ, Arizona'
|
||||||
|
|
||||||
oldest_article = 7
|
oldest_article = 7
|
||||||
max_articles_per_feed = 100
|
max_articles_per_feed = 100
|
||||||
remove_empty_feeds = True
|
remove_empty_feeds = True
|
||||||
no_stylesheets = True
|
no_stylesheets = True
|
||||||
remove_javascript = True
|
remove_javascript = True
|
||||||
# extra_css = '.headline {font-size: medium;} \n .fact { padding-top: 10pt }'
|
# extra_css = '.headline {font-size: medium;} \n .fact { padding-top: 10pt }'
|
||||||
extra_css = ' body{ font-family: Verdana,Helvetica,Arial,sans-serif } .headline {font-size: medium} .introduction{font-weight: bold} .story-feature{display: block; padding: 0; border: 1px solid; width: 40%; font-size: small} .story-feature h2{text-align: center; text-transform: uppercase} '
|
extra_css = ' body{ font-family: Verdana,Helvetica,Arial,sans-serif } .headline {font-size: medium} .introduction{font-weight: bold} .story-feature{display: block; padding: 0; border: 1px solid; width: 40%; font-size: small} .story-feature h2{text-align: center; text-transform: uppercase} ' # noqa
|
||||||
|
|
||||||
remove_attributes = ['width','height','h2','subHeadline','style']
|
remove_attributes = ['width', 'height', 'h2', 'subHeadline', 'style']
|
||||||
remove_tags = [
|
remove_tags = [
|
||||||
dict(name='div', attrs={'id':['slidingBillboard', 'top728x90', 'subindex-header', 'topSearch']}),
|
dict(name='div', attrs={
|
||||||
dict(name='div', attrs={'id':['simplesearch', 'azcLoginBox', 'azcLoginBoxInner', 'topNav']}),
|
'id': ['slidingBillboard', 'top728x90', 'subindex-header', 'topSearch']}),
|
||||||
dict(name='div', attrs={'id':['carsDrop', 'homesDrop', 'rentalsDrop', 'classifiedDrop']}),
|
dict(name='div', attrs={
|
||||||
dict(name='div', attrs={'id':['nav', 'mp', 'subnav', 'jobsDrop']}),
|
'id': ['simplesearch', 'azcLoginBox', 'azcLoginBoxInner', 'topNav']}),
|
||||||
dict(name='h6', attrs={'class':['section-header']}),
|
dict(name='div', attrs={
|
||||||
dict(name='a', attrs={'href':['#comments']}),
|
'id': ['carsDrop', 'homesDrop', 'rentalsDrop', 'classifiedDrop']}),
|
||||||
dict(name='div', attrs={'class':['articletools clearfix', 'floatRight']}),
|
dict(name='div', attrs={'id': ['nav', 'mp', 'subnav', 'jobsDrop']}),
|
||||||
dict(name='div', attrs={'id':['fbFrame', 'ob', 'storyComments', 'storyGoogleAdBox']}),
|
dict(name='h6', attrs={'class': ['section-header']}),
|
||||||
dict(name='div', attrs={'id':['storyTopHomes', 'openRight', 'footerwrap', 'copyright']}),
|
dict(name='a', attrs={'href': ['#comments']}),
|
||||||
dict(name='div', attrs={'id':['blogsHed', 'blog_comments', 'blogByline','blogTopics']}),
|
dict(name='div', attrs={
|
||||||
dict(name='div', attrs={'id':['membersRightMain', 'dealsfooter', 'azrTopHed', 'azrRightCol']}),
|
'class': ['articletools clearfix', 'floatRight']}),
|
||||||
dict(name='div', attrs={'id':['ttdHeader', 'ttdTimeWeather']}),
|
dict(name='div', attrs={
|
||||||
dict(name='div', attrs={'id':['membersRightMain', 'deals-header-wrap']}),
|
'id': ['fbFrame', 'ob', 'storyComments', 'storyGoogleAdBox']}),
|
||||||
dict(name='div', attrs={'id':['todoTopSearchBar', 'byline clearfix', 'subdex-topnav']}),
|
dict(name='div', attrs={
|
||||||
dict(name='h1', attrs={'id':['SEOtext']}),
|
'id': ['storyTopHomes', 'openRight', 'footerwrap', 'copyright']}),
|
||||||
dict(name='table', attrs={'class':['ap-mediabox-table']}),
|
dict(name='div', attrs={
|
||||||
dict(name='p', attrs={'class':['ap_para']}),
|
'id': ['blogsHed', 'blog_comments', 'blogByline', 'blogTopics']}),
|
||||||
dict(name='span', attrs={'class':['source-org vcard', 'org fn']}),
|
dict(name='div', attrs={
|
||||||
dict(name='a', attrs={'href':['http://hosted2.ap.org/APDEFAULT/privacy']}),
|
'id': ['membersRightMain', 'dealsfooter', 'azrTopHed', 'azrRightCol']}),
|
||||||
dict(name='a', attrs={'href':['http://hosted2.ap.org/APDEFAULT/terms']}),
|
dict(name='div', attrs={'id': ['ttdHeader', 'ttdTimeWeather']}),
|
||||||
dict(name='div', attrs={'id':['onespot_nextclick']}),
|
dict(name='div', attrs={
|
||||||
]
|
'id': ['membersRightMain', 'deals-header-wrap']}),
|
||||||
|
dict(name='div', attrs={
|
||||||
feeds = [
|
'id': ['todoTopSearchBar', 'byline clearfix', 'subdex-topnav']}),
|
||||||
(u'FrontPage', u'http://www.azcentral.com/rss/feeds/republicfront.xml'),
|
dict(name='h1', attrs={'id': ['SEOtext']}),
|
||||||
(u'TopUS-News', u'http://hosted.ap.org/lineups/USHEADS.rss?SITE=AZPHG&SECTION=HOME'),
|
dict(name='table', attrs={'class': ['ap-mediabox-table']}),
|
||||||
(u'WorldNews', u'http://hosted.ap.org/lineups/WORLDHEADS.rss?SITE=AZPHG&SECTION=HOME'),
|
dict(name='p', attrs={'class': ['ap_para']}),
|
||||||
(u'TopBusiness', u'http://hosted.ap.org/lineups/BUSINESSHEADS.rss?SITE=AZPHG&SECTION=HOME'),
|
dict(name='span', attrs={'class': ['source-org vcard', 'org fn']}),
|
||||||
(u'Entertainment', u'http://hosted.ap.org/lineups/ENTERTAINMENT.rss?SITE=AZPHG&SECTION=HOME'),
|
dict(name='a', attrs={
|
||||||
(u'ArizonaNews', u'http://www.azcentral.com/rss/feeds/news.xml'),
|
'href': ['http://hosted2.ap.org/APDEFAULT/privacy']}),
|
||||||
(u'Gilbert', u'http://www.azcentral.com/rss/feeds/gilbert.xml'),
|
dict(name='a', attrs={
|
||||||
(u'Chandler', u'http://www.azcentral.com/rss/feeds/chandler.xml'),
|
'href': ['http://hosted2.ap.org/APDEFAULT/terms']}),
|
||||||
(u'DiningReviews', u'http://www.azcentral.com/rss/feeds/diningreviews.xml'),
|
dict(name='div', attrs={'id': ['onespot_nextclick']}),
|
||||||
(u'AZBusiness', u'http://www.azcentral.com/rss/feeds/business.xml'),
|
]
|
||||||
(u'ArizonaDeals', u'http://www.azcentral.com/members/Blog%7E/RealDealsblog'),
|
|
||||||
(u'GroceryDeals', u'http://www.azcentral.com/members/Blog%7E/RealDealsblog/tag/2646')
|
|
||||||
]
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
feeds = [
|
||||||
|
(u'FrontPage', u'http://www.azcentral.com/rss/feeds/republicfront.xml'),
|
||||||
|
(u'TopUS-News', u'http://hosted.ap.org/lineups/USHEADS.rss?SITE=AZPHG&SECTION=HOME'),
|
||||||
|
(u'WorldNews', u'http://hosted.ap.org/lineups/WORLDHEADS.rss?SITE=AZPHG&SECTION=HOME'),
|
||||||
|
(u'TopBusiness', u'http://hosted.ap.org/lineups/BUSINESSHEADS.rss?SITE=AZPHG&SECTION=HOME'),
|
||||||
|
(u'Entertainment', u'http://hosted.ap.org/lineups/ENTERTAINMENT.rss?SITE=AZPHG&SECTION=HOME'),
|
||||||
|
(u'ArizonaNews', u'http://www.azcentral.com/rss/feeds/news.xml'),
|
||||||
|
(u'Gilbert', u'http://www.azcentral.com/rss/feeds/gilbert.xml'),
|
||||||
|
(u'Chandler', u'http://www.azcentral.com/rss/feeds/chandler.xml'),
|
||||||
|
(u'DiningReviews', u'http://www.azcentral.com/rss/feeds/diningreviews.xml'),
|
||||||
|
(u'AZBusiness', u'http://www.azcentral.com/rss/feeds/business.xml'),
|
||||||
|
(u'ArizonaDeals', u'http://www.azcentral.com/members/Blog%7E/RealDealsblog'),
|
||||||
|
(u'GroceryDeals', u'http://www.azcentral.com/members/Blog%7E/RealDealsblog/tag/2646')
|
||||||
|
]
|
||||||
|
@ -1,42 +1,39 @@
|
|||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
|
||||||
|
|
||||||
class ArmyTimes(BasicNewsRecipe):
|
class ArmyTimes(BasicNewsRecipe):
|
||||||
title = 'Army Times'
|
title = 'Army Times'
|
||||||
__author__ = 'jde'
|
__author__ = 'jde'
|
||||||
__date__ = '16 May 2012'
|
__date__ = '16 May 2012'
|
||||||
__version__ = '1.0'
|
__version__ = '1.0'
|
||||||
description = 'News of the U.S. Army'
|
description = 'News of the U.S. Army'
|
||||||
language = 'en'
|
language = 'en'
|
||||||
publisher = 'ArmyTimes.com'
|
publisher = 'ArmyTimes.com'
|
||||||
category = 'news, U.S. Army'
|
category = 'news, U.S. Army'
|
||||||
tags = 'news, U.S. Army'
|
tags = 'news, U.S. Army'
|
||||||
cover_url = 'http://www.armytimes.com/images/logo_armytimes_alert.jpg'
|
cover_url = 'http://www.armytimes.com/images/logo_armytimes_alert.jpg'
|
||||||
masthead_url = 'http://www.armytimes.com/images/logo_armytimes_alert.jpg'
|
masthead_url = 'http://www.armytimes.com/images/logo_armytimes_alert.jpg'
|
||||||
oldest_article = 7 #days
|
oldest_article = 7 # days
|
||||||
max_articles_per_feed = 25
|
max_articles_per_feed = 25
|
||||||
publication_type = 'newspaper'
|
publication_type = 'newspaper'
|
||||||
no_stylesheets = True
|
no_stylesheets = True
|
||||||
use_embedded_content = False
|
use_embedded_content = False
|
||||||
encoding = None
|
encoding = None
|
||||||
recursions = 0
|
recursions = 0
|
||||||
needs_subscription = False
|
needs_subscription = False
|
||||||
remove_javascript = True
|
remove_javascript = True
|
||||||
remove_empty_feeds = True
|
remove_empty_feeds = True
|
||||||
auto_cleanup = True
|
auto_cleanup = True
|
||||||
|
|
||||||
|
feeds = [
|
||||||
|
|
||||||
|
('News', 'http://www.armytimes.com/rss_news.php'),
|
||||||
feeds = [
|
('Benefits', 'http://www.armytimes.com/rss_benefits.php'),
|
||||||
|
('Money', 'http://www.armytimes.com/rss_money.php'),
|
||||||
('News', 'http://www.armytimes.com/rss_news.php'),
|
('Careers & Education', 'http://www.armytimes.com/rss_careers.php'),
|
||||||
('Benefits', 'http://www.armytimes.com/rss_benefits.php'),
|
('Community', 'http://www.armytimes.com/rss_community.php'),
|
||||||
('Money', 'http://www.armytimes.com/rss_money.php'),
|
('Off Duty', 'http://www.armytimes.com/rss_off_duty.php'),
|
||||||
('Careers & Education', 'http://www.armytimes.com/rss_careers.php'),
|
('Entertainment', 'http://www.armytimes.com/rss_entertainment.php'),
|
||||||
('Community', 'http://www.armytimes.com/rss_community.php'),
|
('Guard & Reserve', 'http://www.armytimes.com/rss_guard.php'),
|
||||||
('Off Duty', 'http://www.armytimes.com/rss_off_duty.php'),
|
|
||||||
('Entertainment', 'http://www.armytimes.com/rss_entertainment.php'),
|
|
||||||
('Guard & Reserve', 'http://www.armytimes.com/rss_guard.php'),
|
|
||||||
|
|
||||||
]
|
]
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
@ -7,10 +7,11 @@ __description__ = 'Get some fresh news from Arrêt sur images'
|
|||||||
|
|
||||||
from calibre.web.feeds.recipes import BasicNewsRecipe
|
from calibre.web.feeds.recipes import BasicNewsRecipe
|
||||||
|
|
||||||
|
|
||||||
class Asi(BasicNewsRecipe):
|
class Asi(BasicNewsRecipe):
|
||||||
|
|
||||||
title = 'Arrêt sur images'
|
title = 'Arrêt sur images'
|
||||||
__author__ = 'François D. (aka franek)'
|
__author__ = 'François D. (aka franek)'
|
||||||
description = 'Global news in french from news site "Arrêt sur images"'
|
description = 'Global news in french from news site "Arrêt sur images"'
|
||||||
|
|
||||||
oldest_article = 7.0
|
oldest_article = 7.0
|
||||||
@ -26,15 +27,16 @@ class Asi(BasicNewsRecipe):
|
|||||||
no_stylesheets = True
|
no_stylesheets = True
|
||||||
remove_javascript = True
|
remove_javascript = True
|
||||||
|
|
||||||
feeds = [
|
feeds = [
|
||||||
('vite dit et gratuit', 'http://www.arretsurimages.net/vite-dit.rss'),
|
('vite dit et gratuit', 'http://www.arretsurimages.net/vite-dit.rss'),
|
||||||
('Toutes les chroniques', 'http://www.arretsurimages.net/chroniques.rss'),
|
('Toutes les chroniques', 'http://www.arretsurimages.net/chroniques.rss'),
|
||||||
('Contenus et dossiers', 'http://www.arretsurimages.net/dossiers.rss'),
|
('Contenus et dossiers', 'http://www.arretsurimages.net/dossiers.rss'),
|
||||||
]
|
]
|
||||||
|
|
||||||
conversion_options = { 'smarten_punctuation' : True }
|
conversion_options = {'smarten_punctuation': True}
|
||||||
|
|
||||||
remove_tags = [dict(id='vite-titre'), dict(id='header'), dict(id='wrap-connexion'), dict(id='col_right'), dict(name='div', attrs={'class':'bloc-chroniqueur-2'}), dict(id='footercontainer')]
|
remove_tags = [dict(id='vite-titre'), dict(id='header'), dict(id='wrap-connexion'), dict(id='col_right'),
|
||||||
|
dict(name='div', attrs={'class': 'bloc-chroniqueur-2'}), dict(id='footercontainer')]
|
||||||
|
|
||||||
def print_version(self, url):
|
def print_version(self, url):
|
||||||
return url.replace('contenu.php', 'contenu-imprimable.php')
|
return url.replace('contenu.php', 'contenu-imprimable.php')
|
||||||
@ -51,4 +53,3 @@ class Asi(BasicNewsRecipe):
|
|||||||
br['password'] = self.password
|
br['password'] = self.password
|
||||||
br.submit()
|
br.submit()
|
||||||
return br
|
return br
|
||||||
|
|
||||||
|
@ -1,4 +1,4 @@
|
|||||||
__license__ = 'GPL v3'
|
__license__ = 'GPL v3'
|
||||||
__copyright__ = '2008-2012, Darko Miletic <darko.miletic at gmail.com>'
|
__copyright__ = '2008-2012, Darko Miletic <darko.miletic at gmail.com>'
|
||||||
'''
|
'''
|
||||||
arstechnica.com
|
arstechnica.com
|
||||||
@ -7,20 +7,21 @@ arstechnica.com
|
|||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
from calibre.ebooks.BeautifulSoup import BeautifulSoup
|
from calibre.ebooks.BeautifulSoup import BeautifulSoup
|
||||||
|
|
||||||
|
|
||||||
class ArsTechnica(BasicNewsRecipe):
|
class ArsTechnica(BasicNewsRecipe):
|
||||||
title = u'Ars Technica'
|
title = u'Ars Technica'
|
||||||
language = 'en'
|
language = 'en'
|
||||||
__author__ = 'Darko Miletic, Sujata Raman, Alexis Rohou, Tom Sparks'
|
__author__ = 'Darko Miletic, Sujata Raman, Alexis Rohou, Tom Sparks'
|
||||||
description = 'Ars Technica: Serving the technologist for 1.2 decades'
|
description = 'Ars Technica: Serving the technologist for 1.2 decades'
|
||||||
publisher = 'Conde Nast Publications'
|
publisher = 'Conde Nast Publications'
|
||||||
category = 'news, IT, technology'
|
category = 'news, IT, technology'
|
||||||
oldest_article = 5
|
oldest_article = 5
|
||||||
max_articles_per_feed = 100
|
max_articles_per_feed = 100
|
||||||
no_stylesheets = True
|
no_stylesheets = True
|
||||||
encoding = 'utf-8'
|
encoding = 'utf-8'
|
||||||
use_embedded_content = False
|
use_embedded_content = False
|
||||||
remove_empty_feeds = True
|
remove_empty_feeds = True
|
||||||
publication_type = 'newsportal'
|
publication_type = 'newsportal'
|
||||||
extra_css = '''
|
extra_css = '''
|
||||||
body {font-family: Arial,sans-serif}
|
body {font-family: Arial,sans-serif}
|
||||||
.heading{font-family: "Times New Roman",serif}
|
.heading{font-family: "Times New Roman",serif}
|
||||||
@ -31,56 +32,48 @@ class ArsTechnica(BasicNewsRecipe):
|
|||||||
'''
|
'''
|
||||||
|
|
||||||
conversion_options = {
|
conversion_options = {
|
||||||
'comments' : description
|
'comments': description, 'tags': category, 'language': language, 'publisher': publisher
|
||||||
,'tags' : category
|
}
|
||||||
,'language' : language
|
|
||||||
,'publisher' : publisher
|
|
||||||
}
|
|
||||||
|
|
||||||
keep_only_tags = [
|
keep_only_tags = [
|
||||||
dict(attrs={'class':'standalone'})
|
dict(attrs={'class': 'standalone'}), dict(attrs={'id': 'article-guts'})
|
||||||
,dict(attrs={'id':'article-guts'})
|
]
|
||||||
]
|
|
||||||
|
|
||||||
remove_tags = [
|
remove_tags = [
|
||||||
dict(name=['object','link','embed','iframe','meta'])
|
dict(name=['object', 'link', 'embed', 'iframe', 'meta']), dict(attrs={'class': 'corner-info'}), dict(attrs={
|
||||||
,dict(attrs={'class':'corner-info'})
|
'id': 'article-footer-wrap'}), dict(attrs={'class': 'article-expander'}), dict(name='nav', attrs={'class': 'subheading'})
|
||||||
,dict(attrs={'id': 'article-footer-wrap'})
|
]
|
||||||
,dict(attrs={'class': 'article-expander'})
|
|
||||||
,dict(name='nav',attrs={'class': 'subheading'})
|
|
||||||
]
|
|
||||||
remove_attributes = ['lang']
|
remove_attributes = ['lang']
|
||||||
|
|
||||||
feeds = [
|
feeds = [
|
||||||
|
(u'Ars Features (All our long-form feature articles)', u'http://feeds.arstechnica.com/arstechnica/features'),
|
||||||
(u'Ars Features (All our long-form feature articles)' , u'http://feeds.arstechnica.com/arstechnica/features')
|
(u'Technology Lab (Information Technology)', u'http://feeds.arstechnica.com/arstechnica/technology-lab'),
|
||||||
, (u'Technology Lab (Information Technology)' , u'http://feeds.arstechnica.com/arstechnica/technology-lab')
|
(u'Gear & Gadgets', u'http://feeds.arstechnica.com/arstechnica/gadgets'),
|
||||||
,(u'Gear & Gadgets' , u'http://feeds.arstechnica.com/arstechnica/gadgets')
|
(u'Ministry of Innovation (Business of Technology)', u'http://feeds.arstechnica.com/arstechnica/business'),
|
||||||
,(u'Ministry of Innovation (Business of Technology)' , u'http://feeds.arstechnica.com/arstechnica/business')
|
(u'Risk Assessment (Security & Hacktivism)', u'http://feeds.arstechnica.com/arstechnica/security'),
|
||||||
,(u'Risk Assessment (Security & Hacktivism)' , u'http://feeds.arstechnica.com/arstechnica/security')
|
(u'Law & Disorder (Civilizations & Discontents)', u'http://feeds.arstechnica.com/arstechnica/tech-policy'),
|
||||||
,(u'Law & Disorder (Civilizations & Discontents)' , u'http://feeds.arstechnica.com/arstechnica/tech-policy')
|
(u'Infinite Loop (Apple Ecosystem)', u'http://feeds.arstechnica.com/arstechnica/apple'),
|
||||||
,(u'Infinite Loop (Apple Ecosystem)' , u'http://feeds.arstechnica.com/arstechnica/apple')
|
(u'Opposable Thumbs (Gaming & Entertainment)', u'http://feeds.arstechnica.com/arstechnica/gaming'),
|
||||||
,(u'Opposable Thumbs (Gaming & Entertainment)' , u'http://feeds.arstechnica.com/arstechnica/gaming')
|
(u'Scientific Method (Science & Exploration)', u'http://feeds.arstechnica.com/arstechnica/science'),
|
||||||
,(u'Scientific Method (Science & Exploration)' , u'http://feeds.arstechnica.com/arstechnica/science')
|
(u'Multiverse (Exploratoins & Meditations on Sci-Fi)', u'http://feeds.arstechnica.com/arstechnica/multiverse'),
|
||||||
,(u'Multiverse (Exploratoins & Meditations on Sci-Fi)' , u'http://feeds.arstechnica.com/arstechnica/multiverse')
|
(u'Cars Technica (All Things Automotive)', u'http://feeds.arstechnica.com/arstechnica/cars'),
|
||||||
,(u'Cars Technica (All Things Automotive)' , u'http://feeds.arstechnica.com/arstechnica/cars')
|
(u'Staff Blogs (From the Minds of Ars)', u'http://feeds.arstechnica.com/arstechnica/staff-blogs')
|
||||||
,(u'Staff Blogs (From the Minds of Ars)' , u'http://feeds.arstechnica.com/arstechnica/staff-blogs')
|
]
|
||||||
]
|
|
||||||
|
|
||||||
def append_page(self, soup, appendtag, position):
|
def append_page(self, soup, appendtag, position):
|
||||||
pager = soup.find(attrs={'class':'numbers'})
|
pager = soup.find(attrs={'class': 'numbers'})
|
||||||
if pager:
|
if pager:
|
||||||
nexttag = pager.find(attrs={'class':'next'})
|
nexttag = pager.find(attrs={'class': 'next'})
|
||||||
if nexttag:
|
if nexttag:
|
||||||
nurl = nexttag.parent['href']
|
nurl = nexttag.parent['href']
|
||||||
rawc = self.index_to_soup(nurl,True)
|
rawc = self.index_to_soup(nurl, True)
|
||||||
soup2 = BeautifulSoup(rawc, fromEncoding=self.encoding)
|
soup2 = BeautifulSoup(rawc, fromEncoding=self.encoding)
|
||||||
texttag = soup2.find(attrs={'id':'article-guts'})
|
texttag = soup2.find(attrs={'id': 'article-guts'})
|
||||||
newpos = len(texttag.contents)
|
newpos = len(texttag.contents)
|
||||||
self.append_page(soup2,texttag,newpos)
|
self.append_page(soup2, texttag, newpos)
|
||||||
texttag.extract()
|
texttag.extract()
|
||||||
pager.extract()
|
pager.extract()
|
||||||
appendtag.insert(position,texttag)
|
appendtag.insert(position, texttag)
|
||||||
|
|
||||||
def preprocess_html(self, soup):
|
def preprocess_html(self, soup):
|
||||||
self.append_page(soup, soup.body, 3)
|
self.append_page(soup, soup.body, 3)
|
||||||
@ -102,4 +95,4 @@ class ArsTechnica(BasicNewsRecipe):
|
|||||||
return soup
|
return soup
|
||||||
|
|
||||||
def preprocess_raw_html(self, raw, url):
|
def preprocess_raw_html(self, raw, url):
|
||||||
return '<html><head>'+raw[raw.find('</head>'):]
|
return '<html><head>' + raw[raw.find('</head>'):]
|
||||||
|
@ -1,20 +1,18 @@
|
|||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
|
||||||
|
|
||||||
class HindustanTimes(BasicNewsRecipe):
|
class HindustanTimes(BasicNewsRecipe):
|
||||||
title = u'Asco de vida'
|
title = u'Asco de vida'
|
||||||
language = 'es'
|
language = 'es'
|
||||||
__author__ = 'Krittika Goyal'
|
__author__ = 'Krittika Goyal'
|
||||||
oldest_article = 1 #days
|
oldest_article = 1 # days
|
||||||
max_articles_per_feed = 25
|
max_articles_per_feed = 25
|
||||||
#encoding = 'cp1252'
|
|
||||||
use_embedded_content = False
|
use_embedded_content = False
|
||||||
|
|
||||||
no_stylesheets = True
|
no_stylesheets = True
|
||||||
keep_only_tags = dict(name='div', attrs={'class':'box story'})
|
keep_only_tags = dict(name='div', attrs={'class': 'box story'})
|
||||||
|
|
||||||
|
|
||||||
feeds = [
|
|
||||||
('News',
|
|
||||||
'http://feeds2.feedburner.com/AscoDeVida'),
|
|
||||||
]
|
|
||||||
|
|
||||||
|
feeds = [
|
||||||
|
('News',
|
||||||
|
'http://feeds2.feedburner.com/AscoDeVida'),
|
||||||
|
]
|
||||||
|
@ -1,6 +1,6 @@
|
|||||||
#!/usr/bin/env python2
|
#!/usr/bin/env python2
|
||||||
|
|
||||||
__license__ = 'GPL v3'
|
__license__ = 'GPL v3'
|
||||||
__copyright__ = '2009, Bruce <bruce at dotdoh.com>'
|
__copyright__ = '2009, Bruce <bruce at dotdoh.com>'
|
||||||
'''
|
'''
|
||||||
asiaone.com
|
asiaone.com
|
||||||
@ -8,23 +8,25 @@ asiaone.com
|
|||||||
|
|
||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
|
||||||
|
|
||||||
class AsiaOne(BasicNewsRecipe):
|
class AsiaOne(BasicNewsRecipe):
|
||||||
title = u'AsiaOne'
|
title = u'AsiaOne'
|
||||||
oldest_article = 2
|
oldest_article = 2
|
||||||
max_articles_per_feed = 100
|
max_articles_per_feed = 100
|
||||||
__author__ = 'Bruce'
|
__author__ = 'Bruce'
|
||||||
description = 'News from Singapore Press Holdings Portal'
|
description = 'News from Singapore Press Holdings Portal'
|
||||||
no_stylesheets = False
|
no_stylesheets = False
|
||||||
language = 'en_SG'
|
language = 'en_SG'
|
||||||
remove_javascript = True
|
remove_javascript = True
|
||||||
remove_tags = [dict(name='span', attrs={'class':'footer'})]
|
remove_tags = [dict(name='span', attrs={'class': 'footer'})]
|
||||||
keep_only_tags = [
|
keep_only_tags = [
|
||||||
dict(name='h1', attrs={'class':'headline'}),
|
dict(name='h1', attrs={'class': 'headline'}),
|
||||||
dict(name='div', attrs={'class':['article-content','person-info row']})
|
dict(name='div', attrs={
|
||||||
]
|
'class': ['article-content', 'person-info row']})
|
||||||
|
]
|
||||||
|
|
||||||
feeds = [
|
feeds = [
|
||||||
('Singapore', 'http://asiaone.feedsportal.com/c/34151/f/618415/index.rss'),
|
('Singapore', 'http://asiaone.feedsportal.com/c/34151/f/618415/index.rss'),
|
||||||
('Asia', 'http://asiaone.feedsportal.com/c/34151/f/618416/index.rss')
|
('Asia', 'http://asiaone.feedsportal.com/c/34151/f/618416/index.rss')
|
||||||
|
|
||||||
]
|
]
|
||||||
|
@ -1,5 +1,5 @@
|
|||||||
|
|
||||||
__license__ = 'GPL v3'
|
__license__ = 'GPL v3'
|
||||||
__copyright__ = '2012, Darko Miletic <darko.miletic at gmail.com>'
|
__copyright__ = '2012, Darko Miletic <darko.miletic at gmail.com>'
|
||||||
'''
|
'''
|
||||||
www.asianreviewofbooks.com
|
www.asianreviewofbooks.com
|
||||||
@ -7,20 +7,21 @@ www.asianreviewofbooks.com
|
|||||||
|
|
||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
|
||||||
|
|
||||||
class AsianReviewOfBooks(BasicNewsRecipe):
|
class AsianReviewOfBooks(BasicNewsRecipe):
|
||||||
title = 'The Asian Review of Books'
|
title = 'The Asian Review of Books'
|
||||||
__author__ = 'Darko Miletic'
|
__author__ = 'Darko Miletic'
|
||||||
description = 'In addition to reviewing books about or of relevance to Asia, the Asian Review of Books also features long-format essays by leading Asian writers and thinkers, to providing an unparalleled forum for discussion of key contemporary issues by Asians for Asia and a vehicle of intellectual depth and breadth where leading thinkers can write on the books, arts and ideas of the day. Widely quoted and referenced, with an archive of more than one thousand book reviews, it is the only web resource dedicated to Asian books. And now, with the addition of the new premium content, the Asian Review of Books, is a must-read publication.'
|
description = 'In addition to reviewing books about or of relevance to Asia, the Asian Review of Books also features long-format essays by leading Asian writers and thinkers, to providing an unparalleled forum for discussion of key contemporary issues by Asians for Asia and a vehicle of intellectual depth and breadth where leading thinkers can write on the books, arts and ideas of the day. Widely quoted and referenced, with an archive of more than one thousand book reviews, it is the only web resource dedicated to Asian books. And now, with the addition of the new premium content, the Asian Review of Books, is a must-read publication.' # noqa
|
||||||
publisher = 'The Asian Review of Books'
|
publisher = 'The Asian Review of Books'
|
||||||
category = 'literature, books, reviews, Asia'
|
category = 'literature, books, reviews, Asia'
|
||||||
oldest_article = 30
|
oldest_article = 30
|
||||||
max_articles_per_feed = 100
|
max_articles_per_feed = 100
|
||||||
no_stylesheets = True
|
no_stylesheets = True
|
||||||
use_embedded_content = False
|
use_embedded_content = False
|
||||||
encoding = 'cp1252'
|
encoding = 'cp1252'
|
||||||
language = 'en_CN'
|
language = 'en_CN'
|
||||||
publication_type = 'magazine'
|
publication_type = 'magazine'
|
||||||
masthead_url = 'http://www.asianreviewofbooks.com/new/images/mob_arb.png'
|
masthead_url = 'http://www.asianreviewofbooks.com/new/images/mob_arb.png'
|
||||||
extra_css = """
|
extra_css = """
|
||||||
body{font-family: serif}
|
body{font-family: serif}
|
||||||
.big {font-size: xx-large}
|
.big {font-size: xx-large}
|
||||||
@ -31,21 +32,16 @@ class AsianReviewOfBooks(BasicNewsRecipe):
|
|||||||
"""
|
"""
|
||||||
|
|
||||||
conversion_options = {
|
conversion_options = {
|
||||||
'comment' : description
|
'comment': description, 'tags': category, 'publisher': publisher, 'language': language
|
||||||
, 'tags' : category
|
}
|
||||||
, 'publisher' : publisher
|
|
||||||
, 'language' : language
|
|
||||||
}
|
|
||||||
|
|
||||||
|
remove_tags = [dict(name=['object', 'script', 'iframe', 'embed'])]
|
||||||
remove_tags = [dict(name=['object','script','iframe','embed'])]
|
|
||||||
remove_attributes = ['style', 'onclick']
|
remove_attributes = ['style', 'onclick']
|
||||||
feeds = [(u'Articles' , u'http://www.asianreviewofbooks.com/new/rss.php')]
|
feeds = [(u'Articles', u'http://www.asianreviewofbooks.com/new/rss.php')]
|
||||||
|
|
||||||
def print_version(self, url):
|
def print_version(self, url):
|
||||||
root, sep, artid = url.rpartition('?ID=')
|
root, sep, artid = url.rpartition('?ID=')
|
||||||
return root + 'getarticle.php?articleID=' + artid + '&stats=web'
|
return root + 'getarticle.php?articleID=' + artid + '&stats=web'
|
||||||
|
|
||||||
def preprocess_raw_html(self, raw, url):
|
def preprocess_raw_html(self, raw, url):
|
||||||
return '<html><head><title>title</title></head><body>' + raw + '</body></html>'
|
return '<html><head><title>title</title></head><body>' + raw + '</body></html>'
|
||||||
|
|
||||||
|
@ -1,18 +1,19 @@
|
|||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
|
||||||
|
|
||||||
class AstroNEWS(BasicNewsRecipe):
|
class AstroNEWS(BasicNewsRecipe):
|
||||||
title = u'AstroNEWS'
|
title = u'AstroNEWS'
|
||||||
__author__ = 'fenuks'
|
__author__ = 'fenuks'
|
||||||
description = u'AstroNEWS regularnie dostarcza wiadomości o wydarzeniach związanych z astronomią i astronautyką. Informujemy o aktualnych odkryciach i wydarzeniach naukowych, zapowiadamy ciekawe zjawiska astronomiczne. Serwis jest częścią portalu astronomicznego AstroNET prowadzonego przez miłośników astronomii i zawodowych astronomów.'
|
description = u'AstroNEWS regularnie dostarcza wiadomości o wydarzeniach związanych z astronomią i astronautyką. Informujemy o aktualnych odkryciach i wydarzeniach naukowych, zapowiadamy ciekawe zjawiska astronomiczne. Serwis jest częścią portalu astronomicznego AstroNET prowadzonego przez miłośników astronomii i zawodowych astronomów.' # noqa
|
||||||
category = 'astronomy, science'
|
category = 'astronomy, science'
|
||||||
language = 'pl'
|
language = 'pl'
|
||||||
oldest_article = 8
|
oldest_article = 8
|
||||||
max_articles_per_feed = 100
|
max_articles_per_feed = 100
|
||||||
#extra_css= 'table {text-align: left;}'
|
no_stylesheets = True
|
||||||
no_stylesheets=True
|
cover_url = 'http://news.astronet.pl/img/logo_news.jpg'
|
||||||
cover_url='http://news.astronet.pl/img/logo_news.jpg'
|
|
||||||
remove_attributes = ['width', 'align']
|
remove_attributes = ['width', 'align']
|
||||||
remove_tags=[dict(name='hr')]
|
remove_tags = [dict(name='hr')]
|
||||||
feeds = [(u'Wiadomości', u'http://news.astronet.pl/rss.cgi')]
|
feeds = [(u'Wiadomości', u'http://news.astronet.pl/rss.cgi')]
|
||||||
|
|
||||||
def print_version(self, url):
|
def print_version(self, url):
|
||||||
return url.replace('astronet.pl/', 'astronet.pl/print.cgi?')
|
return url.replace('astronet.pl/', 'astronet.pl/print.cgi?')
|
||||||
|
@ -1,11 +1,12 @@
|
|||||||
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:fdm=marker:ai
|
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:fdm=marker:ai
|
||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
|
||||||
|
|
||||||
class Astroflesz(BasicNewsRecipe):
|
class Astroflesz(BasicNewsRecipe):
|
||||||
title = u'Astroflesz'
|
title = u'Astroflesz'
|
||||||
oldest_article = 7
|
oldest_article = 7
|
||||||
__author__ = 'fenuks'
|
__author__ = 'fenuks'
|
||||||
description = u'astroflesz.pl - to portal poświęcony astronomii. Informuje zarówno o aktualnych wydarzeniach i odkryciach naukowych, jak również zapowiada ciekawe zjawiska astronomiczne'
|
description = u'astroflesz.pl - to portal poświęcony astronomii. Informuje zarówno o aktualnych wydarzeniach i odkryciach naukowych, jak również zapowiada ciekawe zjawiska astronomiczne' # noqa
|
||||||
category = 'astronomy'
|
category = 'astronomy'
|
||||||
language = 'pl'
|
language = 'pl'
|
||||||
cover_url = 'http://www.astroflesz.pl/templates/astroflesz/images/logo/logo.png'
|
cover_url = 'http://www.astroflesz.pl/templates/astroflesz/images/logo/logo.png'
|
||||||
@ -16,12 +17,13 @@ class Astroflesz(BasicNewsRecipe):
|
|||||||
remove_empty_feeds = True
|
remove_empty_feeds = True
|
||||||
remove_attributes = ['style']
|
remove_attributes = ['style']
|
||||||
keep_only_tags = [dict(id="k2Container")]
|
keep_only_tags = [dict(id="k2Container")]
|
||||||
remove_tags_after = dict(name='div', attrs={'class':'itemLinks'})
|
remove_tags_after = dict(name='div', attrs={'class': 'itemLinks'})
|
||||||
remove_tags = [dict(name='div', attrs={'class':['itemLinks', 'itemToolbar', 'itemRatingBlock']})]
|
remove_tags = [dict(name='div', attrs={
|
||||||
|
'class': ['itemLinks', 'itemToolbar', 'itemRatingBlock']})]
|
||||||
feeds = [(u'Wszystkie', u'http://astroflesz.pl/?format=feed')]
|
feeds = [(u'Wszystkie', u'http://astroflesz.pl/?format=feed')]
|
||||||
|
|
||||||
def postprocess_html(self, soup, first_fetch):
|
def postprocess_html(self, soup, first_fetch):
|
||||||
t = soup.find(attrs={'class':'itemIntroText'})
|
t = soup.find(attrs={'class': 'itemIntroText'})
|
||||||
if t:
|
if t:
|
||||||
for i in t.findAll('img'):
|
for i in t.findAll('img'):
|
||||||
i['style'] = 'float: left; margin-right: 5px;'
|
i['style'] = 'float: left; margin-right: 5px;'
|
||||||
|
@ -1,4 +1,4 @@
|
|||||||
__license__ = 'GPL v3'
|
__license__ = 'GPL v3'
|
||||||
__copyright__ = '2011, Darko Miletic <darko.miletic at gmail.com>'
|
__copyright__ = '2011, Darko Miletic <darko.miletic at gmail.com>'
|
||||||
'''
|
'''
|
||||||
www.athensnews.gr
|
www.athensnews.gr
|
||||||
@ -6,21 +6,22 @@ www.athensnews.gr
|
|||||||
|
|
||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
|
||||||
|
|
||||||
class AthensNews(BasicNewsRecipe):
|
class AthensNews(BasicNewsRecipe):
|
||||||
title = 'Athens News'
|
title = 'Athens News'
|
||||||
__author__ = 'Darko Miletic'
|
__author__ = 'Darko Miletic'
|
||||||
description = 'Greece in English since 1952'
|
description = 'Greece in English since 1952'
|
||||||
publisher = 'NEP Publishing Company SA'
|
publisher = 'NEP Publishing Company SA'
|
||||||
category = 'news, politics, Greece, Athens'
|
category = 'news, politics, Greece, Athens'
|
||||||
oldest_article = 1
|
oldest_article = 1
|
||||||
max_articles_per_feed = 200
|
max_articles_per_feed = 200
|
||||||
no_stylesheets = True
|
no_stylesheets = True
|
||||||
encoding = 'utf8'
|
encoding = 'utf8'
|
||||||
use_embedded_content = False
|
use_embedded_content = False
|
||||||
language = 'en_GR'
|
language = 'en_GR'
|
||||||
remove_empty_feeds = True
|
remove_empty_feeds = True
|
||||||
publication_type = 'newspaper'
|
publication_type = 'newspaper'
|
||||||
masthead_url = 'http://www.athensnews.gr/sites/athensnews/themes/athensnewsv3/images/logo.jpg'
|
masthead_url = 'http://www.athensnews.gr/sites/athensnews/themes/athensnewsv3/images/logo.jpg'
|
||||||
extra_css = """
|
extra_css = """
|
||||||
body{font-family: Arial,Helvetica,sans-serif }
|
body{font-family: Arial,Helvetica,sans-serif }
|
||||||
img{margin-bottom: 0.4em; display:block}
|
img{margin-bottom: 0.4em; display:block}
|
||||||
@ -30,36 +31,32 @@ class AthensNews(BasicNewsRecipe):
|
|||||||
"""
|
"""
|
||||||
|
|
||||||
conversion_options = {
|
conversion_options = {
|
||||||
'comment' : description
|
'comment': description, 'tags': category, 'publisher': publisher, 'language': language, 'linearize_tables': True
|
||||||
, 'tags' : category
|
}
|
||||||
, 'publisher' : publisher
|
|
||||||
, 'language' : language
|
|
||||||
, 'linearize_tables' : True
|
|
||||||
}
|
|
||||||
|
|
||||||
remove_tags = [
|
remove_tags = [
|
||||||
dict(name=['meta','link'])
|
dict(name=['meta', 'link'])
|
||||||
]
|
]
|
||||||
keep_only_tags=[
|
keep_only_tags = [
|
||||||
dict(name='span',attrs={'class':'big'})
|
dict(name='span', attrs={'class': 'big'}), dict(
|
||||||
,dict(name='td', attrs={'class':['articlepubdate','text']})
|
name='td', attrs={'class': ['articlepubdate', 'text']})
|
||||||
]
|
]
|
||||||
remove_attributes=['lang']
|
remove_attributes = ['lang']
|
||||||
|
|
||||||
|
|
||||||
feeds = [
|
feeds = [
|
||||||
(u'News' , u'http://www.athensnews.gr/category/1/feed' )
|
|
||||||
,(u'Politics' , u'http://www.athensnews.gr/category/8/feed' )
|
(u'News', u'http://www.athensnews.gr/category/1/feed'),
|
||||||
,(u'Business' , u'http://www.athensnews.gr/category/2/feed' )
|
(u'Politics', u'http://www.athensnews.gr/category/8/feed'),
|
||||||
,(u'Economy' , u'http://www.athensnews.gr/category/11/feed')
|
(u'Business', u'http://www.athensnews.gr/category/2/feed'),
|
||||||
,(u'Community' , u'http://www.athensnews.gr/category/5/feed' )
|
(u'Economy', u'http://www.athensnews.gr/category/11/feed'),
|
||||||
,(u'Arts' , u'http://www.athensnews.gr/category/3/feed' )
|
(u'Community', u'http://www.athensnews.gr/category/5/feed'),
|
||||||
,(u'Living in Athens', u'http://www.athensnews.gr/category/7/feed' )
|
(u'Arts', u'http://www.athensnews.gr/category/3/feed'),
|
||||||
,(u'Sports' , u'http://www.athensnews.gr/category/4/feed' )
|
(u'Living in Athens', u'http://www.athensnews.gr/category/7/feed'),
|
||||||
,(u'Travel' , u'http://www.athensnews.gr/category/6/feed' )
|
(u'Sports', u'http://www.athensnews.gr/category/4/feed'),
|
||||||
,(u'Letters' , u'http://www.athensnews.gr/category/44/feed')
|
(u'Travel', u'http://www.athensnews.gr/category/6/feed'),
|
||||||
,(u'Media' , u'http://www.athensnews.gr/multimedia/feed' )
|
(u'Letters', u'http://www.athensnews.gr/category/44/feed'),
|
||||||
]
|
(u'Media', u'http://www.athensnews.gr/multimedia/feed')
|
||||||
|
]
|
||||||
|
|
||||||
def print_version(self, url):
|
def print_version(self, url):
|
||||||
return url + '?action=print'
|
return url + '?action=print'
|
||||||
|
@ -1,6 +1,6 @@
|
|||||||
#!/usr/bin/env python2
|
#!/usr/bin/env python2
|
||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
__license__ = 'GPL v3'
|
__license__ = 'GPL v3'
|
||||||
__copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
|
__copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
|
||||||
'''
|
'''
|
||||||
theatlantic.com
|
theatlantic.com
|
||||||
@ -9,13 +9,15 @@ import html5lib
|
|||||||
from lxml import html
|
from lxml import html
|
||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
|
||||||
|
|
||||||
def classes(classes):
|
def classes(classes):
|
||||||
q = frozenset(classes.split(' '))
|
q = frozenset(classes.split(' '))
|
||||||
return dict(attrs={'class':lambda x:x and frozenset(x.split()).intersection(q)})
|
return dict(attrs={'class': lambda x: x and frozenset(x.split()).intersection(q)})
|
||||||
|
|
||||||
|
|
||||||
class TheAtlantic(BasicNewsRecipe):
|
class TheAtlantic(BasicNewsRecipe):
|
||||||
|
|
||||||
title = 'The Atlantic'
|
title = 'The Atlantic'
|
||||||
__author__ = 'Kovid Goyal'
|
__author__ = 'Kovid Goyal'
|
||||||
description = 'Current affairs and politics focussed on the US'
|
description = 'Current affairs and politics focussed on the US'
|
||||||
INDEX = 'http://www.theatlantic.com/magazine/'
|
INDEX = 'http://www.theatlantic.com/magazine/'
|
||||||
@ -23,13 +25,14 @@ class TheAtlantic(BasicNewsRecipe):
|
|||||||
encoding = 'utf-8'
|
encoding = 'utf-8'
|
||||||
|
|
||||||
keep_only_tags = [
|
keep_only_tags = [
|
||||||
classes('article-header article-body article-magazine metadata article-cover-content lead-img'),
|
classes(
|
||||||
|
'article-header article-body article-magazine metadata article-cover-content lead-img'),
|
||||||
]
|
]
|
||||||
remove_tags = [
|
remove_tags = [
|
||||||
{'name': ['meta', 'link', 'noscript']},
|
{'name': ['meta', 'link', 'noscript']},
|
||||||
{'attrs':{'class':['offset-wrapper', 'ad-boxfeatures-wrapper']}},
|
{'attrs': {'class': ['offset-wrapper', 'ad-boxfeatures-wrapper']}},
|
||||||
{'attrs':{'class':lambda x: x and 'article-tools' in x}},
|
{'attrs': {'class': lambda x: x and 'article-tools' in x}},
|
||||||
{'src':lambda x:x and 'spotxchange.com' in x},
|
{'src': lambda x: x and 'spotxchange.com' in x},
|
||||||
]
|
]
|
||||||
remove_tags_after = classes('article-body')
|
remove_tags_after = classes('article-body')
|
||||||
|
|
||||||
@ -48,7 +51,7 @@ class TheAtlantic(BasicNewsRecipe):
|
|||||||
return url + '?single_page=true'
|
return url + '?single_page=true'
|
||||||
|
|
||||||
def preprocess_html(self, soup):
|
def preprocess_html(self, soup):
|
||||||
for img in soup.findAll('img', attrs={'data-src':True}):
|
for img in soup.findAll('img', attrs={'data-src': True}):
|
||||||
img['src'] = img['data-src']
|
img['src'] = img['data-src']
|
||||||
return soup
|
return soup
|
||||||
|
|
||||||
@ -61,8 +64,8 @@ class TheAtlantic(BasicNewsRecipe):
|
|||||||
self.cover_url = img['src']
|
self.cover_url = img['src']
|
||||||
current_section, current_articles = 'Cover Story', []
|
current_section, current_articles = 'Cover Story', []
|
||||||
feeds = []
|
feeds = []
|
||||||
for div in soup.findAll('div', attrs={'class':lambda x: x and set(x.split()).intersection({'top-sections', 'bottom-sections'})}):
|
for div in soup.findAll('div', attrs={'class': lambda x: x and set(x.split()).intersection({'top-sections', 'bottom-sections'})}):
|
||||||
for h2 in div.findAll('h2', attrs={'class':True}):
|
for h2 in div.findAll('h2', attrs={'class': True}):
|
||||||
if 'section-name' in h2['class'].split():
|
if 'section-name' in h2['class'].split():
|
||||||
if current_articles:
|
if current_articles:
|
||||||
feeds.append((current_section, current_articles))
|
feeds.append((current_section, current_articles))
|
||||||
@ -75,18 +78,22 @@ class TheAtlantic(BasicNewsRecipe):
|
|||||||
url = a['href']
|
url = a['href']
|
||||||
if url.startswith('/'):
|
if url.startswith('/'):
|
||||||
url = 'http://www.theatlantic.com' + url
|
url = 'http://www.theatlantic.com' + url
|
||||||
li = a.findParent('li', attrs={'class':lambda x: x and 'article' in x.split()})
|
li = a.findParent(
|
||||||
|
'li', attrs={'class': lambda x: x and 'article' in x.split()})
|
||||||
desc = ''
|
desc = ''
|
||||||
dek = li.find(attrs={'class':lambda x:x and 'dek' in x.split()})
|
dek = li.find(
|
||||||
|
attrs={'class': lambda x: x and 'dek' in x.split()})
|
||||||
if dek is not None:
|
if dek is not None:
|
||||||
desc += self.tag_to_string(dek)
|
desc += self.tag_to_string(dek)
|
||||||
byline = li.find(attrs={'class':lambda x:x and 'byline' in x.split()})
|
byline = li.find(
|
||||||
|
attrs={'class': lambda x: x and 'byline' in x.split()})
|
||||||
if byline is not None:
|
if byline is not None:
|
||||||
desc += ' -- ' + self.tag_to_string(byline)
|
desc += ' -- ' + self.tag_to_string(byline)
|
||||||
self.log('\t', title, 'at', url)
|
self.log('\t', title, 'at', url)
|
||||||
if desc:
|
if desc:
|
||||||
self.log('\t\t', desc)
|
self.log('\t\t', desc)
|
||||||
current_articles.append({'title':title, 'url':url, 'description':desc})
|
current_articles.append(
|
||||||
|
{'title': title, 'url': url, 'description': desc})
|
||||||
if current_articles:
|
if current_articles:
|
||||||
feeds.append((current_section, current_articles))
|
feeds.append((current_section, current_articles))
|
||||||
return feeds
|
return feeds
|
||||||
|
@ -3,20 +3,21 @@
|
|||||||
from __future__ import unicode_literals, division, absolute_import, print_function
|
from __future__ import unicode_literals, division, absolute_import, print_function
|
||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
|
||||||
|
|
||||||
class AdvancedUserRecipe1421956712(BasicNewsRecipe):
|
class AdvancedUserRecipe1421956712(BasicNewsRecipe):
|
||||||
title = 'TheAtlantic.com'
|
title = 'TheAtlantic.com'
|
||||||
__author__ = 'ebrandon'
|
__author__ = 'ebrandon'
|
||||||
language = 'en'
|
language = 'en'
|
||||||
description = 'News and editorial about politics, culture, entertainment, tech, etc. Contains many articles not seen in The Atlantic magazine'
|
description = 'News and editorial about politics, culture, entertainment, tech, etc. Contains many articles not seen in The Atlantic magazine'
|
||||||
oldest_article = 7
|
oldest_article = 7
|
||||||
max_articles_per_feed = 100
|
max_articles_per_feed = 100
|
||||||
auto_cleanup = True
|
auto_cleanup = True
|
||||||
ignore_duplicate_articles = {'title', 'url'}
|
ignore_duplicate_articles = {'title', 'url'}
|
||||||
|
|
||||||
def print_version(self, url):
|
def print_version(self, url):
|
||||||
return url.replace('/archive/', '/print/')
|
return url.replace('/archive/', '/print/')
|
||||||
|
|
||||||
feeds = [
|
feeds = [
|
||||||
('Politics', 'http://feeds.feedburner.com/AtlanticPoliticsChannel'),
|
('Politics', 'http://feeds.feedburner.com/AtlanticPoliticsChannel'),
|
||||||
('International', 'http://feeds.feedburner.com/AtlanticInternational'),
|
('International', 'http://feeds.feedburner.com/AtlanticInternational'),
|
||||||
('National', 'http://feeds.feedburner.com/AtlanticNational'),
|
('National', 'http://feeds.feedburner.com/AtlanticNational'),
|
||||||
|
@ -2,14 +2,15 @@
|
|||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
|
||||||
|
|
||||||
class AttacEspanaRecipe (BasicNewsRecipe):
|
class AttacEspanaRecipe (BasicNewsRecipe):
|
||||||
__author__ = 'Marc Busqué <marc@lamarciana.com>'
|
__author__ = 'Marc Busqué <marc@lamarciana.com>'
|
||||||
__url__ = 'http://www.lamarciana.com'
|
__url__ = 'http://www.lamarciana.com'
|
||||||
__version__ = '1.0.2'
|
__version__ = '1.0.2'
|
||||||
__license__ = 'GPL v3'
|
__license__ = 'GPL v3'
|
||||||
__copyright__ = '2012, Marc Busqué <marc@lamarciana.com>'
|
__copyright__ = '2012, Marc Busqué <marc@lamarciana.com>'
|
||||||
title = u'attac.es'
|
title = u'attac.es'
|
||||||
description = u'La Asociación por la Tasación de las Transacciones Financieras y por la Ayuda a los Ciudadanos (ATTAC) es un movimiento internacional altermundialista que promueve el control democrático de los mercados financieros y las instituciones encargadas de su control mediante la reflexión política y la movilización social.'
|
description = u'La Asociación por la Tasación de las Transacciones Financieras y por la Ayuda a los Ciudadanos (ATTAC) es un movimiento internacional altermundialista que promueve el control democrático de los mercados financieros y las instituciones encargadas de su control mediante la reflexión política y la movilización social.' # noqa
|
||||||
url = 'http://www.attac.es'
|
url = 'http://www.attac.es'
|
||||||
language = 'es'
|
language = 'es'
|
||||||
tags = 'contrainformación, información alternativa'
|
tags = 'contrainformación, información alternativa'
|
||||||
@ -27,5 +28,5 @@ class AttacEspanaRecipe (BasicNewsRecipe):
|
|||||||
cover_url = u'http://www.attac.es/wp-content/themes/attacweb/images/attaces.jpg'
|
cover_url = u'http://www.attac.es/wp-content/themes/attacweb/images/attaces.jpg'
|
||||||
|
|
||||||
feeds = [
|
feeds = [
|
||||||
(u'Attac', u'http://www.attac.es/feed'),
|
(u'Attac', u'http://www.attac.es/feed'),
|
||||||
]
|
]
|
||||||
|
@ -1,9 +1,9 @@
|
|||||||
#!/usr/bin/env python2
|
#!/usr/bin/env python2
|
||||||
__license__ = 'GPL v3'
|
__license__ = 'GPL v3'
|
||||||
__author__ = 'GabrieleMarini, based on Darko Miletic'
|
__author__ = 'GabrieleMarini, based on Darko Miletic'
|
||||||
__copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>, Gabriele Marini'
|
__copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>, Gabriele Marini'
|
||||||
__version__ = 'v1.02 Marini Gabriele '
|
__version__ = 'v1.02 Marini Gabriele '
|
||||||
__date__ = '14062010'
|
__date__ = '14062010'
|
||||||
__description__ = 'Italian daily newspaper'
|
__description__ = 'Italian daily newspaper'
|
||||||
|
|
||||||
'''
|
'''
|
||||||
@ -11,53 +11,46 @@ http://www.corrieredellosport.it/
|
|||||||
'''
|
'''
|
||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
|
||||||
|
|
||||||
class Auto(BasicNewsRecipe):
|
class Auto(BasicNewsRecipe):
|
||||||
__author__ = 'Gabriele Marini'
|
__author__ = 'Gabriele Marini'
|
||||||
description = 'Auto and Formula 1'
|
description = 'Auto and Formula 1'
|
||||||
|
|
||||||
cover_url = 'http://www.auto.it/res/imgs/logo_Auto.png'
|
cover_url = 'http://www.auto.it/res/imgs/logo_Auto.png'
|
||||||
|
|
||||||
|
title = u'Auto'
|
||||||
|
publisher = 'CONTE Editore'
|
||||||
|
category = 'Sport'
|
||||||
|
|
||||||
title = u'Auto'
|
language = 'it'
|
||||||
publisher = 'CONTE Editore'
|
timefmt = '[%a, %d %b, %Y]'
|
||||||
category = 'Sport'
|
|
||||||
|
|
||||||
language = 'it'
|
|
||||||
timefmt = '[%a, %d %b, %Y]'
|
|
||||||
|
|
||||||
oldest_article = 60
|
oldest_article = 60
|
||||||
max_articles_per_feed = 30
|
max_articles_per_feed = 30
|
||||||
use_embedded_content = False
|
use_embedded_content = False
|
||||||
recursion = 10
|
recursion = 10
|
||||||
|
|
||||||
remove_javascript = True
|
remove_javascript = True
|
||||||
no_stylesheets = True
|
no_stylesheets = True
|
||||||
|
|
||||||
html2lrf_options = [
|
html2lrf_options = [
|
||||||
'--comment', description
|
'--comment', description, '--category', category, '--publisher', publisher, '--ignore-tables'
|
||||||
, '--category', category
|
]
|
||||||
, '--publisher', publisher
|
|
||||||
, '--ignore-tables'
|
|
||||||
]
|
|
||||||
|
|
||||||
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"\nlinearize_tables=True'
|
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + \
|
||||||
|
description + '"\ntags="' + category + '"\nlinearize_tables=True'
|
||||||
|
|
||||||
keep_only_tags = [
|
keep_only_tags = [
|
||||||
dict(name='h2', attrs={'class':['tit_Article y_Txt']}),
|
dict(name='h2', attrs={'class': ['tit_Article y_Txt']}),
|
||||||
dict(name='h2', attrs={'class':['tit_Article']}),
|
dict(name='h2', attrs={'class': ['tit_Article']}),
|
||||||
dict(name='div', attrs={'class':['box_Img newsdet_new ']}),
|
dict(name='div', attrs={'class': ['box_Img newsdet_new ']}),
|
||||||
dict(name='div', attrs={'class':['box_Img newsdet_as ']}),
|
dict(name='div', attrs={'class': ['box_Img newsdet_as ']}),
|
||||||
dict(name='table', attrs={'class':['table_A']}),
|
dict(name='table', attrs={'class': ['table_A']}),
|
||||||
dict(name='div', attrs={'class':['txt_Article txtBox_cms']}),
|
dict(name='div', attrs={'class': ['txt_Article txtBox_cms']}),
|
||||||
dict(name='testoscheda')]
|
dict(name='testoscheda')]
|
||||||
|
|
||||||
|
|
||||||
feeds = [
|
feeds = [
|
||||||
(u'Tutte le News' , u'http://www.auto.it/rss/articoli.xml' ),
|
(u'Tutte le News', u'http://www.auto.it/rss/articoli.xml'),
|
||||||
(u'Prove su Strada' , u'http://www.auto.it/rss/prove+6.xml'),
|
(u'Prove su Strada', u'http://www.auto.it/rss/prove+6.xml'),
|
||||||
(u'Novit\xe0' , u'http://www.auto.it/rss/novita+3.xml')
|
(u'Novit\xe0', u'http://www.auto.it/rss/novita+3.xml')
|
||||||
]
|
]
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
@ -1,16 +1,15 @@
|
|||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
|
||||||
|
|
||||||
class AutoBlog(BasicNewsRecipe):
|
class AutoBlog(BasicNewsRecipe):
|
||||||
title = u'Auto Blog'
|
title = u'Auto Blog'
|
||||||
__author__ = 'Welovelucy'
|
__author__ = 'Welovelucy'
|
||||||
language = 'en'
|
language = 'en'
|
||||||
description = 'Auto industry news'
|
description = 'Auto industry news'
|
||||||
oldest_article = 7
|
oldest_article = 7
|
||||||
max_articles_per_feed = 100
|
max_articles_per_feed = 100
|
||||||
|
|
||||||
feeds = [(u'AutoBlog', u'http://www.autoblog.com/rss.xml')]
|
feeds = [(u'AutoBlog', u'http://www.autoblog.com/rss.xml')]
|
||||||
|
|
||||||
def print_version(self, url):
|
def print_version(self, url):
|
||||||
return url + 'print/'
|
return url + 'print/'
|
||||||
|
|
||||||
|
|
||||||
|
@ -1,9 +1,9 @@
|
|||||||
#!/usr/bin/env python2
|
#!/usr/bin/env python2
|
||||||
__license__ = 'GPL v3'
|
__license__ = 'GPL v3'
|
||||||
__author__ = 'GabrieleMarini, based on Darko Miletic'
|
__author__ = 'GabrieleMarini, based on Darko Miletic'
|
||||||
__copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>, Gabriele Marini'
|
__copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>, Gabriele Marini'
|
||||||
__version__ = 'v1.02 Marini Gabriele '
|
__version__ = 'v1.02 Marini Gabriele '
|
||||||
__date__ = '10, January 2010'
|
__date__ = '10, January 2010'
|
||||||
__description__ = 'Italian daily newspaper'
|
__description__ = 'Italian daily newspaper'
|
||||||
|
|
||||||
'''
|
'''
|
||||||
@ -11,80 +11,80 @@ http://www.corrieredellosport.it/
|
|||||||
'''
|
'''
|
||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
|
||||||
|
|
||||||
class AutoPR(BasicNewsRecipe):
|
class AutoPR(BasicNewsRecipe):
|
||||||
__author__ = 'Gabriele Marini'
|
__author__ = 'Gabriele Marini'
|
||||||
description = 'Auto and Formula 1'
|
description = 'Auto and Formula 1'
|
||||||
|
|
||||||
cover_url = 'http://www.auto.it/res/imgs/logo_Auto.png'
|
cover_url = 'http://www.auto.it/res/imgs/logo_Auto.png'
|
||||||
|
|
||||||
|
title = u'Auto Prove'
|
||||||
|
publisher = 'CONTE Editore'
|
||||||
|
category = 'Sport'
|
||||||
|
|
||||||
title = u'Auto Prove'
|
language = 'it'
|
||||||
publisher = 'CONTE Editore'
|
timefmt = '[%a, %d %b, %Y]'
|
||||||
category = 'Sport'
|
|
||||||
|
|
||||||
language = 'it'
|
|
||||||
timefmt = '[%a, %d %b, %Y]'
|
|
||||||
|
|
||||||
oldest_article = 60
|
oldest_article = 60
|
||||||
max_articles_per_feed = 20
|
max_articles_per_feed = 20
|
||||||
use_embedded_content = False
|
use_embedded_content = False
|
||||||
recursion = 100
|
recursion = 100
|
||||||
|
|
||||||
remove_javascript = True
|
remove_javascript = True
|
||||||
no_stylesheets = True
|
no_stylesheets = True
|
||||||
|
|
||||||
#html2lrf_options = [
|
# html2lrf_options = [
|
||||||
# '--comment', description
|
# '--comment', description
|
||||||
# , '--category', category
|
# , '--category', category
|
||||||
# , '--publisher', publisher
|
# , '--publisher', publisher
|
||||||
# , '--ignore-tables'
|
# , '--ignore-tables'
|
||||||
# ]
|
# ]
|
||||||
|
|
||||||
#html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"\nlinearize_tables=True'
|
|
||||||
|
|
||||||
keep_only_tags = [
|
keep_only_tags = [
|
||||||
dict(name='h2', attrs={'class':['tit_Article y_Txt']}),
|
dict(name='h2', attrs={'class': ['tit_Article y_Txt']}),
|
||||||
dict(name='h2', attrs={'class':['tit_Article']}),
|
dict(name='h2', attrs={'class': ['tit_Article']}),
|
||||||
dict(name='div', attrs={'class':['box_Img newsdet_new ']}),
|
dict(name='div', attrs={'class': ['box_Img newsdet_new ']}),
|
||||||
dict(name='div', attrs={'class':['box_Img newsdet_as ']}),
|
dict(name='div', attrs={'class': ['box_Img newsdet_as ']}),
|
||||||
dict(name='table', attrs={'class':['table_A']}),
|
dict(name='table', attrs={'class': ['table_A']}),
|
||||||
dict(name='div', attrs={'class':['txt_Article txtBox_cms']}),
|
dict(name='div', attrs={'class': ['txt_Article txtBox_cms']}),
|
||||||
dict(name='testoscheda')]
|
dict(name='testoscheda')]
|
||||||
|
|
||||||
def parse_index(self):
|
def parse_index(self):
|
||||||
feeds = []
|
feeds = []
|
||||||
for title, url in [
|
for title, url in [
|
||||||
("Prove su Strada" , "http://www.auto.it/rss/prove+6.xml")
|
("Prove su Strada", "http://www.auto.it/rss/prove+6.xml")
|
||||||
]:
|
]:
|
||||||
soup = self.index_to_soup(url)
|
soup = self.index_to_soup(url)
|
||||||
soup = soup.find('channel')
|
soup = soup.find('channel')
|
||||||
print soup
|
print soup
|
||||||
|
|
||||||
for article in soup.findAllNext('item'):
|
for article in soup.findAllNext('item'):
|
||||||
title = self.tag_to_string(article.title)
|
title = self.tag_to_string(article.title)
|
||||||
date = self.tag_to_string(article.pubDate)
|
date = self.tag_to_string(article.pubDate)
|
||||||
description = self.tag_to_string(article.description)
|
description = self.tag_to_string(article.description)
|
||||||
link = self.tag_to_string(article.guid)
|
link = self.tag_to_string(article.guid)
|
||||||
# print article
|
# print article
|
||||||
articles = self.create_links_append(link, date, description)
|
articles = self.create_links_append(link, date, description)
|
||||||
if articles:
|
if articles:
|
||||||
feeds.append((title, articles))
|
feeds.append((title, articles))
|
||||||
return feeds
|
return feeds
|
||||||
|
|
||||||
def create_links_append(self, link, date, description):
|
def create_links_append(self, link, date, description):
|
||||||
current_articles = []
|
current_articles = []
|
||||||
|
|
||||||
current_articles.append({'title': 'Generale', 'url': link,'description':description, 'date':date}),
|
current_articles.append(
|
||||||
current_articles.append({'title': 'Design', 'url': link.replace('scheda','design'),'description':'scheda', 'date':''}),
|
{'title': 'Generale', 'url': link, 'description': description, 'date': date}),
|
||||||
current_articles.append({'title': 'Interni', 'url': link.replace('scheda','interni'),'description':'Interni', 'date':''}),
|
current_articles.append({'title': 'Design', 'url': link.replace(
|
||||||
current_articles.append({'title': 'Tecnica', 'url': link.replace('scheda','tecnica'),'description':'Tecnica', 'date':''}),
|
'scheda', 'design'), 'description': 'scheda', 'date': ''}),
|
||||||
current_articles.append({'title': 'Su Strada', 'url': link.replace('scheda','su_strada'),'description':'Su Strada', 'date':''}),
|
current_articles.append({'title': 'Interni', 'url': link.replace(
|
||||||
current_articles.append({'title': 'Pagella', 'url': link.replace('scheda','pagella'),'description':'Pagella', 'date':''}),
|
'scheda', 'interni'), 'description': 'Interni', 'date': ''}),
|
||||||
current_articles.append({'title': 'Rilevamenti', 'url': link.replace('scheda','telemetria'),'description':'Rilevamenti', 'date':''})
|
current_articles.append({'title': 'Tecnica', 'url': link.replace(
|
||||||
|
'scheda', 'tecnica'), 'description': 'Tecnica', 'date': ''}),
|
||||||
|
current_articles.append({'title': 'Su Strada', 'url': link.replace(
|
||||||
|
'scheda', 'su_strada'), 'description': 'Su Strada', 'date': ''}),
|
||||||
|
current_articles.append({'title': 'Pagella', 'url': link.replace(
|
||||||
|
'scheda', 'pagella'), 'description': 'Pagella', 'date': ''}),
|
||||||
|
current_articles.append({'title': 'Rilevamenti', 'url': link.replace(
|
||||||
|
'scheda', 'telemetria'), 'description': 'Rilevamenti', 'date': ''})
|
||||||
|
|
||||||
return current_articles
|
return current_articles
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
@ -1,7 +1,7 @@
|
|||||||
#!/usr/bin/env python2
|
#!/usr/bin/env python2
|
||||||
# -*- coding: utf-8 -*-
|
# -*- coding: utf-8 -*-
|
||||||
|
|
||||||
__license__ = 'GPL v3'
|
__license__ = 'GPL v3'
|
||||||
__copyright__ = u'2011, Silviu Cotoar\u0103'
|
__copyright__ = u'2011, Silviu Cotoar\u0103'
|
||||||
'''
|
'''
|
||||||
auto-bild.ro
|
auto-bild.ro
|
||||||
@ -9,47 +9,42 @@ auto-bild.ro
|
|||||||
|
|
||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
|
||||||
|
|
||||||
class AutoBild(BasicNewsRecipe):
|
class AutoBild(BasicNewsRecipe):
|
||||||
title = u'Auto Bild'
|
title = u'Auto Bild'
|
||||||
__author__ = u'Silviu Cotoar\u0103'
|
__author__ = u'Silviu Cotoar\u0103'
|
||||||
description = 'Auto'
|
description = 'Auto'
|
||||||
publisher = 'Auto Bild'
|
publisher = 'Auto Bild'
|
||||||
oldest_article = 50
|
oldest_article = 50
|
||||||
language = 'ro'
|
language = 'ro'
|
||||||
max_articles_per_feed = 100
|
max_articles_per_feed = 100
|
||||||
no_stylesheets = True
|
no_stylesheets = True
|
||||||
use_embedded_content = False
|
use_embedded_content = False
|
||||||
category = 'Ziare,Reviste,Auto'
|
category = 'Ziare,Reviste,Auto'
|
||||||
encoding = 'utf-8'
|
encoding = 'utf-8'
|
||||||
cover_url = 'http://www.auto-bild.ro/images/autobild.gif'
|
cover_url = 'http://www.auto-bild.ro/images/autobild.gif'
|
||||||
|
|
||||||
conversion_options = {
|
conversion_options = {
|
||||||
'comments' : description
|
'comments': description, 'tags': category, 'language': language, 'publisher': publisher
|
||||||
,'tags' : category
|
}
|
||||||
,'language' : language
|
|
||||||
,'publisher' : publisher
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
keep_only_tags = [
|
keep_only_tags = [
|
||||||
dict(name='div', attrs={'class':'box_2 articol clearfix'})
|
dict(name='div', attrs={'class': 'box_2 articol clearfix'})
|
||||||
]
|
]
|
||||||
|
|
||||||
remove_tags = [
|
remove_tags = [
|
||||||
dict(name='div', attrs={'class':['detail']})
|
dict(name='div', attrs={'class': ['detail']}), dict(name='a', attrs={'id': ['zoom_link']}), dict(
|
||||||
, dict(name='a', attrs={'id':['zoom_link']})
|
name='div', attrs={'class': ['icons clearfix']}), dict(name='div', attrs={'class': ['pub_articol clearfix']})
|
||||||
, dict(name='div', attrs={'class':['icons clearfix']})
|
|
||||||
, dict(name='div', attrs={'class':['pub_articol clearfix']})
|
|
||||||
|
|
||||||
]
|
]
|
||||||
|
|
||||||
remove_tags_after = [
|
remove_tags_after = [
|
||||||
dict(name='div', attrs={'class':['pub_articol clearfix']})
|
dict(name='div', attrs={'class': ['pub_articol clearfix']})
|
||||||
]
|
]
|
||||||
|
|
||||||
feeds = [
|
feeds = [
|
||||||
(u'Feeds', u'http://www.auto-bild.ro/rss/toate')
|
(u'Feeds', u'http://www.auto-bild.ro/rss/toate')
|
||||||
]
|
]
|
||||||
|
|
||||||
def preprocess_html(self, soup):
|
def preprocess_html(self, soup):
|
||||||
return self.adeify_images(soup)
|
return self.adeify_images(soup)
|
||||||
|
@ -1,27 +1,28 @@
|
|||||||
import re
|
import re
|
||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
|
||||||
|
|
||||||
class autogids(BasicNewsRecipe):
|
class autogids(BasicNewsRecipe):
|
||||||
title = u'Automatiseringgids IT'
|
title = u'Automatiseringgids IT'
|
||||||
oldest_article = 7
|
oldest_article = 7
|
||||||
__author__ = 'DrMerry'
|
__author__ = 'DrMerry'
|
||||||
description = 'IT-nieuws van Automatiseringgids'
|
description = 'IT-nieuws van Automatiseringgids'
|
||||||
language = 'nl'
|
language = 'nl'
|
||||||
publisher = 'AutomatiseringGids'
|
publisher = 'AutomatiseringGids'
|
||||||
category = 'Nieuws, IT, Nederlandstalig'
|
category = 'Nieuws, IT, Nederlandstalig'
|
||||||
simultaneous_downloads = 5
|
simultaneous_downloads = 5
|
||||||
timefmt = ' [%a, %d %B, %Y]'
|
timefmt = ' [%a, %d %B, %Y]'
|
||||||
no_stylesheets = True
|
no_stylesheets = True
|
||||||
remove_javascript = True
|
remove_javascript = True
|
||||||
remove_empty_feeds = True
|
remove_empty_feeds = True
|
||||||
publication_type = 'newspaper'
|
publication_type = 'newspaper'
|
||||||
encoding = 'utf-8'
|
encoding = 'utf-8'
|
||||||
cover_url = 'http://www.automatiseringgids.nl/binaries/content/gallery/ag/marketing/ag-avatar-100x50.jpg'
|
cover_url = 'http://www.automatiseringgids.nl/binaries/content/gallery/ag/marketing/ag-avatar-100x50.jpg'
|
||||||
keep_only_tags = [dict(name='div', attrs={'class':['content']})]
|
keep_only_tags = [dict(name='div', attrs={'class': ['content']})]
|
||||||
|
|
||||||
preprocess_regexps = [
|
preprocess_regexps = [
|
||||||
(re.compile(r'(<h3>Reacties</h3>|<h2>Zie ook:</h2>|<div style=".*</div>|<a[^>]*>|</a>)', re.DOTALL|re.IGNORECASE),
|
(re.compile(r'(<h3>Reacties</h3>|<h2>Zie ook:</h2>|<div style=".*</div>|<a[^>]*>|</a>)', re.DOTALL | re.IGNORECASE),
|
||||||
lambda match: ''),
|
lambda match: ''),
|
||||||
]
|
]
|
||||||
|
|
||||||
feeds = [(u'Actueel', u'http://www.automatiseringgids.nl/rss.aspx')]
|
feeds = [(u'Actueel', u'http://www.automatiseringgids.nl/rss.aspx')]
|
||||||
|
@ -9,22 +9,25 @@ www.autosport.com
|
|||||||
|
|
||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
|
||||||
|
|
||||||
class autosport(BasicNewsRecipe):
|
class autosport(BasicNewsRecipe):
|
||||||
title = u'Autosport'
|
title = u'Autosport'
|
||||||
__author__ = 'MrStefan <mrstefaan@gmail.com>'
|
__author__ = 'MrStefan <mrstefaan@gmail.com>'
|
||||||
language = 'en_GB'
|
language = 'en_GB'
|
||||||
description =u'Daily Formula 1 and motorsport news from the leading weekly motor racing magazine. The authority on Formula 1, F1, MotoGP, GP2, Champ Car, Le Mans...'
|
description = u'Daily Formula 1 and motorsport news from the leading weekly motor racing magazine. The authority on Formula 1, F1, MotoGP, GP2, Champ Car, Le Mans...' # noqa
|
||||||
masthead_url='http://cdn.images.autosport.com/asdotcom.gif'
|
masthead_url = 'http://cdn.images.autosport.com/asdotcom.gif'
|
||||||
remove_empty_feeds= True
|
remove_empty_feeds = True
|
||||||
oldest_article = 1
|
oldest_article = 1
|
||||||
max_articles_per_feed = 100
|
max_articles_per_feed = 100
|
||||||
remove_javascript=True
|
remove_javascript = True
|
||||||
no_stylesheets=True
|
no_stylesheets = True
|
||||||
|
|
||||||
keep_only_tags =[]
|
keep_only_tags = []
|
||||||
keep_only_tags.append(dict(name = 'h1', attrs = {'class' : 'news_headline'}))
|
keep_only_tags.append(dict(name='h1', attrs={'class': 'news_headline'}))
|
||||||
keep_only_tags.append(dict(name = 'td', attrs = {'class' : 'news_article_author'}))
|
keep_only_tags.append(
|
||||||
keep_only_tags.append(dict(name = 'td', attrs = {'class' : 'news_article_date'}))
|
dict(name='td', attrs={'class': 'news_article_author'}))
|
||||||
keep_only_tags.append(dict(name = 'p'))
|
keep_only_tags.append(
|
||||||
|
dict(name='td', attrs={'class': 'news_article_date'}))
|
||||||
|
keep_only_tags.append(dict(name='p'))
|
||||||
|
|
||||||
feeds = [(u'ALL NEWS', u'http://www.autosport.com/rss/allnews.xml')]
|
feeds = [(u'ALL NEWS', u'http://www.autosport.com/rss/allnews.xml')]
|
||||||
|
@ -1,7 +1,7 @@
|
|||||||
#!/usr/bin/env python2
|
#!/usr/bin/env python2
|
||||||
# -*- coding: utf-8 -*-
|
# -*- coding: utf-8 -*-
|
||||||
|
|
||||||
__license__ = 'GPL v3'
|
__license__ = 'GPL v3'
|
||||||
__copyright__ = u'2011, Silviu Cotoar\u0103'
|
__copyright__ = u'2011, Silviu Cotoar\u0103'
|
||||||
'''
|
'''
|
||||||
avantaje.ro
|
avantaje.ro
|
||||||
@ -9,49 +9,41 @@ avantaje.ro
|
|||||||
|
|
||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
|
||||||
|
|
||||||
class Avantaje(BasicNewsRecipe):
|
class Avantaje(BasicNewsRecipe):
|
||||||
title = u'Avantaje'
|
title = u'Avantaje'
|
||||||
__author__ = u'Silviu Cotoar\u0103'
|
__author__ = u'Silviu Cotoar\u0103'
|
||||||
description = u''
|
description = u''
|
||||||
publisher = u'Avantaje'
|
publisher = u'Avantaje'
|
||||||
oldest_article = 25
|
oldest_article = 25
|
||||||
language = 'ro'
|
language = 'ro'
|
||||||
max_articles_per_feed = 100
|
max_articles_per_feed = 100
|
||||||
no_stylesheets = True
|
no_stylesheets = True
|
||||||
use_embedded_content = False
|
use_embedded_content = False
|
||||||
category = 'Ziare,Reviste,Stiri'
|
category = 'Ziare,Reviste,Stiri'
|
||||||
encoding = 'utf-8'
|
encoding = 'utf-8'
|
||||||
cover_url = 'http://www.avantaje.ro/images/default/logo.gif'
|
cover_url = 'http://www.avantaje.ro/images/default/logo.gif'
|
||||||
|
|
||||||
conversion_options = {
|
conversion_options = {
|
||||||
'comments' : description
|
'comments': description, 'tags': category, 'language': language, 'publisher': publisher
|
||||||
,'tags' : category
|
}
|
||||||
,'language' : language
|
|
||||||
,'publisher' : publisher
|
|
||||||
}
|
|
||||||
|
|
||||||
keep_only_tags = [
|
keep_only_tags = [
|
||||||
dict(name='div', attrs={'id':'articol'})
|
dict(name='div', attrs={'id': 'articol'}), dict(name='div', attrs={
|
||||||
, dict(name='div', attrs={'class':'gallery clearfix'})
|
'class': 'gallery clearfix'}), dict(name='div', attrs={'align': 'justify'})
|
||||||
, dict(name='div', attrs={'align':'justify'})
|
]
|
||||||
]
|
|
||||||
|
|
||||||
remove_tags = [
|
remove_tags = [
|
||||||
dict(name='div', attrs={'id':['color_sanatate_box']})
|
dict(name='div', attrs={'id': ['color_sanatate_box']}), dict(name='div', attrs={'class': ['nav']}), dict(name='div', attrs={'class': ['voteaza_art']}), dict(name='div', attrs={'class': ['bookmark']}), dict(name='div', attrs={'class': ['links clearfix']}), dict(name='div', attrs={'class': ['title']}) # noqa
|
||||||
, dict(name='div', attrs={'class':['nav']})
|
]
|
||||||
, dict(name='div', attrs={'class':['voteaza_art']})
|
|
||||||
, dict(name='div', attrs={'class':['bookmark']})
|
|
||||||
, dict(name='div', attrs={'class':['links clearfix']})
|
|
||||||
, dict(name='div', attrs={'class':['title']})
|
|
||||||
]
|
|
||||||
|
|
||||||
remove_tags_after = [
|
remove_tags_after = [
|
||||||
dict(name='div', attrs={'class':['title']})
|
dict(name='div', attrs={'class': ['title']})
|
||||||
]
|
]
|
||||||
|
|
||||||
feeds = [
|
feeds = [
|
||||||
(u'Feeds', u'http://feeds.feedburner.com/Avantaje')
|
(u'Feeds', u'http://feeds.feedburner.com/Avantaje')
|
||||||
]
|
]
|
||||||
|
|
||||||
def preprocess_html(self, soup):
|
def preprocess_html(self, soup):
|
||||||
return self.adeify_images(soup)
|
return self.adeify_images(soup)
|
||||||
|
@ -1,7 +1,7 @@
|
|||||||
#!/usr/bin/env python2
|
#!/usr/bin/env python2
|
||||||
# -*- coding: utf-8 -*-
|
# -*- coding: utf-8 -*-
|
||||||
|
|
||||||
__license__ = 'GPL v3'
|
__license__ = 'GPL v3'
|
||||||
__copyright__ = u'2011, Silviu Cotoar\u0103'
|
__copyright__ = u'2011, Silviu Cotoar\u0103'
|
||||||
'''
|
'''
|
||||||
aventurilapescuit.ro
|
aventurilapescuit.ro
|
||||||
@ -9,43 +9,41 @@ aventurilapescuit.ro
|
|||||||
|
|
||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
|
||||||
|
|
||||||
class AventuriLaPescuit(BasicNewsRecipe):
|
class AventuriLaPescuit(BasicNewsRecipe):
|
||||||
title = u'Aventuri La Pescuit'
|
title = u'Aventuri La Pescuit'
|
||||||
__author__ = u'Silviu Cotoar\u0103'
|
__author__ = u'Silviu Cotoar\u0103'
|
||||||
description = 'Aventuri La Pescuit'
|
description = 'Aventuri La Pescuit'
|
||||||
publisher = 'Aventuri La Pescuit'
|
publisher = 'Aventuri La Pescuit'
|
||||||
oldest_article = 5
|
oldest_article = 5
|
||||||
language = 'ro'
|
language = 'ro'
|
||||||
max_articles_per_feed = 100
|
max_articles_per_feed = 100
|
||||||
no_stylesheets = True
|
no_stylesheets = True
|
||||||
use_embedded_content = False
|
use_embedded_content = False
|
||||||
category = 'Ziare,Pescuit,Hobby'
|
category = 'Ziare,Pescuit,Hobby'
|
||||||
encoding = 'utf-8'
|
encoding = 'utf-8'
|
||||||
cover_url = 'http://www.aventurilapescuit.ro/images/logo.gif'
|
cover_url = 'http://www.aventurilapescuit.ro/images/logo.gif'
|
||||||
|
|
||||||
conversion_options = {
|
conversion_options = {
|
||||||
'comments' : description
|
'comments': description, 'tags': category, 'language': language, 'publisher': publisher
|
||||||
,'tags' : category
|
}
|
||||||
,'language' : language
|
|
||||||
,'publisher' : publisher
|
|
||||||
}
|
|
||||||
|
|
||||||
keep_only_tags = [
|
keep_only_tags = [
|
||||||
dict(name='div', attrs={'id':'Article'})
|
dict(name='div', attrs={'id': 'Article'})
|
||||||
]
|
]
|
||||||
|
|
||||||
remove_tags = [
|
remove_tags = [
|
||||||
dict(name='div', attrs={'class':['right option']})
|
dict(name='div', attrs={'class': ['right option']}), dict(
|
||||||
, dict(name='iframe', attrs={'scrolling':['no']})
|
name='iframe', attrs={'scrolling': ['no']})
|
||||||
]
|
]
|
||||||
|
|
||||||
remove_tags_after = [
|
remove_tags_after = [
|
||||||
dict(name='iframe', attrs={'scrolling':['no']})
|
dict(name='iframe', attrs={'scrolling': ['no']})
|
||||||
]
|
]
|
||||||
|
|
||||||
feeds = [
|
feeds = [
|
||||||
(u'Feeds', u'http://www.aventurilapescuit.ro/sections/rssread/1')
|
(u'Feeds', u'http://www.aventurilapescuit.ro/sections/rssread/1')
|
||||||
]
|
]
|
||||||
|
|
||||||
def preprocess_html(self, soup):
|
def preprocess_html(self, soup):
|
||||||
return self.adeify_images(soup)
|
return self.adeify_images(soup)
|
||||||
|
@ -4,44 +4,44 @@ __copyright__ = '2010, BlonG'
|
|||||||
avto-magazin.si
|
avto-magazin.si
|
||||||
'''
|
'''
|
||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
|
||||||
|
|
||||||
class Dnevnik(BasicNewsRecipe):
|
class Dnevnik(BasicNewsRecipe):
|
||||||
title = u'Avto Magazin'
|
title = u'Avto Magazin'
|
||||||
__author__ = u'BlonG'
|
__author__ = u'BlonG'
|
||||||
description = u'Za avtomobilisti\xc4\x8dne frike, poznavalce in nedeljske \xc5\xa1oferje.'
|
description = u'Za avtomobilisti\xc4\x8dne frike, poznavalce in nedeljske \xc5\xa1oferje.'
|
||||||
oldest_article = 7
|
oldest_article = 7
|
||||||
max_articles_per_feed = 20
|
max_articles_per_feed = 20
|
||||||
labguage = 'sl'
|
labguage = 'sl'
|
||||||
no_stylesheets = True
|
no_stylesheets = True
|
||||||
use_embedded_content = False
|
use_embedded_content = False
|
||||||
language = 'sl'
|
language = 'sl'
|
||||||
|
|
||||||
conversion_options = {'linearize_tables' : True}
|
conversion_options = {'linearize_tables': True}
|
||||||
|
|
||||||
|
cover_url = 'https://sites.google.com/site/javno2010/home/avto_magazin_cover.jpg'
|
||||||
|
|
||||||
cover_url = 'https://sites.google.com/site/javno2010/home/avto_magazin_cover.jpg'
|
extra_css = '''
|
||||||
|
h1{font-family:Arial,Helvetica,sans-serif; font-weight:bold;font-size:large;}
|
||||||
|
h2{font-family:Arial,Helvetica,sans-serif; font-weight:bold;font-size:large;}
|
||||||
|
p{font-family:Arial,Helvetica,sans-serif;font-size:small;}
|
||||||
|
body{font-family:Helvetica,Arial,sans-serif;font-size:small;}
|
||||||
|
'''
|
||||||
|
|
||||||
extra_css = '''
|
keep_only_tags = [
|
||||||
h1{font-family:Arial,Helvetica,sans-serif; font-weight:bold;font-size:large;}
|
dict(name='div', attrs={'id': '_iprom_inStream'}),
|
||||||
h2{font-family:Arial,Helvetica,sans-serif; font-weight:bold;font-size:large;}
|
# dict(name='div', attrs={'class':'entry-content'}),
|
||||||
p{font-family:Arial,Helvetica,sans-serif;font-size:small;}
|
]
|
||||||
body{font-family:Helvetica,Arial,sans-serif;font-size:small;}
|
|
||||||
'''
|
|
||||||
|
|
||||||
keep_only_tags = [
|
remove_tags = [
|
||||||
dict(name='div', attrs={'id':'_iprom_inStream'}),
|
dict(name='div', attrs={'id': 'voteConfirmation'}),
|
||||||
# dict(name='div', attrs={'class':'entry-content'}),
|
dict(name='div', attrs={'id': 'InsideVote'}),
|
||||||
]
|
dict(name='div', attrs={'class': 'Zone234'}),
|
||||||
|
dict(name='div', attrs={'class': 'Comments'}),
|
||||||
|
dict(name='div', attrs={'class': 'sorodneNovice'}),
|
||||||
|
dict(name='div', attrs={'id': 'footer'}),
|
||||||
|
]
|
||||||
|
|
||||||
remove_tags = [
|
feeds = [
|
||||||
dict(name='div', attrs={'id':'voteConfirmation'}),
|
(u'Novice', u'http://www.avto-magazin.si/rss/')
|
||||||
dict(name='div', attrs={'id':'InsideVote'}),
|
]
|
||||||
dict(name='div', attrs={'class':'Zone234'}),
|
|
||||||
dict(name='div', attrs={'class':'Comments'}),
|
|
||||||
dict(name='div', attrs={'class':'sorodneNovice'}),
|
|
||||||
dict(name='div', attrs={'id':'footer'}),
|
|
||||||
]
|
|
||||||
|
|
||||||
|
|
||||||
feeds = [
|
|
||||||
(u'Novice', u'http://www.avto-magazin.si/rss/')
|
|
||||||
]
|
|
||||||
|
@ -1,4 +1,4 @@
|
|||||||
__license__ = 'GPL v3'
|
__license__ = 'GPL v3'
|
||||||
__copyright__ = '2010, Darko Miletic <darko.miletic at gmail.com>'
|
__copyright__ = '2010, Darko Miletic <darko.miletic at gmail.com>'
|
||||||
'''
|
'''
|
||||||
axxon.com.ar
|
axxon.com.ar
|
||||||
@ -6,35 +6,33 @@ axxon.com.ar
|
|||||||
from calibre import strftime
|
from calibre import strftime
|
||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
|
||||||
|
|
||||||
class Axxon_news(BasicNewsRecipe):
|
class Axxon_news(BasicNewsRecipe):
|
||||||
title = 'Revista Axxon'
|
title = 'Revista Axxon'
|
||||||
__author__ = 'Darko Miletic'
|
__author__ = 'Darko Miletic'
|
||||||
description = 'Axxon, Ciencia Ficcion en Bits'
|
description = 'Axxon, Ciencia Ficcion en Bits'
|
||||||
publisher = 'Revista Axxon - Ciencia Ficcion'
|
publisher = 'Revista Axxon - Ciencia Ficcion'
|
||||||
category = 'SF, Argentina'
|
category = 'SF, Argentina'
|
||||||
oldest_article = 31
|
oldest_article = 31
|
||||||
delay = 1
|
delay = 1
|
||||||
max_articles_per_feed = 100
|
max_articles_per_feed = 100
|
||||||
no_stylesheets = False
|
no_stylesheets = False
|
||||||
use_embedded_content = False
|
use_embedded_content = False
|
||||||
language = 'es_AR'
|
language = 'es_AR'
|
||||||
encoding = 'utf-8'
|
encoding = 'utf-8'
|
||||||
publication_type = 'magazine'
|
publication_type = 'magazine'
|
||||||
INDEX = 'http://axxon.com.ar/rev/'
|
INDEX = 'http://axxon.com.ar/rev/'
|
||||||
extra_css = ' body{font-family: Verdana,Arial,sans-serif} .editorial{font-family: serif} .posttitle{font-family: "Trebuchet MS","Lucida Grande",Verdana,Arial,sans-serif} .cuento{font-family: "Times New Roman", serif} .biografia{color: red; font-weight: bold; font-family: Verdana,Geneva,Arial,Helvetica,sans-serif} '
|
extra_css = ' body{font-family: Verdana,Arial,sans-serif} .editorial{font-family: serif} .posttitle{font-family: "Trebuchet MS","Lucida Grande",Verdana,Arial,sans-serif} .cuento{font-family: "Times New Roman", serif} .biografia{color: red; font-weight: bold; font-family: Verdana,Geneva,Arial,Helvetica,sans-serif} ' # noqa
|
||||||
|
|
||||||
conversion_options = {
|
conversion_options = {
|
||||||
'comment' : description
|
'comment': description, 'tags': category, 'publisher': publisher, 'language': language
|
||||||
, 'tags' : category
|
}
|
||||||
, 'publisher' : publisher
|
|
||||||
, 'language' : language
|
|
||||||
}
|
|
||||||
|
|
||||||
|
keep_only_tags = [dict(name='div', attrs={'class': 'post'})]
|
||||||
keep_only_tags = [dict(name='div', attrs={'class':'post'})]
|
remove_tags = [dict(name=['object', 'link', 'iframe', 'embed', 'img'])]
|
||||||
remove_tags = [dict(name=['object','link','iframe','embed','img'])]
|
remove_tags_after = [
|
||||||
remove_tags_after = [dict(attrs={'class':['editorial','correo','biografia','articulo']})]
|
dict(attrs={'class': ['editorial', 'correo', 'biografia', 'articulo']})]
|
||||||
remove_attributes = ['width','height','font','border','align']
|
remove_attributes = ['width', 'height', 'font', 'border', 'align']
|
||||||
|
|
||||||
def parse_index(self):
|
def parse_index(self):
|
||||||
articles = []
|
articles = []
|
||||||
@ -44,21 +42,16 @@ class Axxon_news(BasicNewsRecipe):
|
|||||||
description = ''
|
description = ''
|
||||||
title_prefix = ''
|
title_prefix = ''
|
||||||
feed_link = item.find('a')
|
feed_link = item.find('a')
|
||||||
if feed_link and feed_link.has_key('href') and feed_link['href'].startswith('?p='):
|
if feed_link and feed_link.has_key('href') and feed_link['href'].startswith('?p='): # noqa
|
||||||
url = self.INDEX + feed_link['href']
|
url = self.INDEX + feed_link['href']
|
||||||
title = title_prefix + self.tag_to_string(feed_link)
|
title = title_prefix + self.tag_to_string(feed_link)
|
||||||
date = strftime(self.timefmt)
|
date = strftime(self.timefmt)
|
||||||
articles.append({
|
articles.append({
|
||||||
'title' :title
|
'title': title, 'date': date, 'url': url, 'description': description
|
||||||
,'date' :date
|
})
|
||||||
,'url' :url
|
|
||||||
,'description':description
|
|
||||||
})
|
|
||||||
return [(soup.head.title.string, articles)]
|
return [(soup.head.title.string, articles)]
|
||||||
|
|
||||||
|
|
||||||
def preprocess_html(self, soup):
|
def preprocess_html(self, soup):
|
||||||
for item in soup.findAll(style=True):
|
for item in soup.findAll(style=True):
|
||||||
del item['style']
|
del item['style']
|
||||||
return self.adeify_images(soup)
|
return self.adeify_images(soup)
|
||||||
|
|
||||||
|
@ -1,6 +1,6 @@
|
|||||||
#!/usr/bin/env python2
|
#!/usr/bin/env python2
|
||||||
|
|
||||||
__license__ = 'GPL v3'
|
__license__ = 'GPL v3'
|
||||||
__copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>'
|
__copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>'
|
||||||
'''
|
'''
|
||||||
axxon.com.ar
|
axxon.com.ar
|
||||||
@ -8,55 +8,50 @@ axxon.com.ar
|
|||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
from calibre.ebooks.BeautifulSoup import Tag
|
from calibre.ebooks.BeautifulSoup import Tag
|
||||||
|
|
||||||
|
|
||||||
class Axxon_news(BasicNewsRecipe):
|
class Axxon_news(BasicNewsRecipe):
|
||||||
title = 'Axxon noticias'
|
title = 'Axxon noticias'
|
||||||
__author__ = 'Darko Miletic'
|
__author__ = 'Darko Miletic'
|
||||||
description = 'Axxon, Ciencia Ficcion en Bits'
|
description = 'Axxon, Ciencia Ficcion en Bits'
|
||||||
publisher = 'Axxon'
|
publisher = 'Axxon'
|
||||||
category = 'news, SF, Argentina, science, movies'
|
category = 'news, SF, Argentina, science, movies'
|
||||||
oldest_article = 7
|
oldest_article = 7
|
||||||
max_articles_per_feed = 100
|
max_articles_per_feed = 100
|
||||||
no_stylesheets = False
|
no_stylesheets = False
|
||||||
use_embedded_content = False
|
use_embedded_content = False
|
||||||
language = 'es_AR'
|
language = 'es_AR'
|
||||||
|
|
||||||
lang = 'es-AR'
|
lang = 'es-AR'
|
||||||
|
|
||||||
conversion_options = {
|
conversion_options = {
|
||||||
'comment' : description
|
'comment': description, 'tags': category, 'publisher': publisher, 'language': lang, 'pretty_print': True
|
||||||
, 'tags' : category
|
}
|
||||||
, 'publisher' : publisher
|
|
||||||
, 'language' : lang
|
|
||||||
, 'pretty_print' : True
|
|
||||||
}
|
|
||||||
|
|
||||||
|
keep_only_tags = [dict(name='div', attrs={'class': 'post'})]
|
||||||
|
|
||||||
keep_only_tags = [dict(name='div', attrs={'class':'post'})]
|
remove_tags = [dict(name=['object', 'link', 'iframe', 'embed'])]
|
||||||
|
|
||||||
remove_tags = [dict(name=['object','link','iframe','embed'])]
|
feeds = [(u'Noticias', u'http://axxon.com.ar/noticias/feed/')]
|
||||||
|
|
||||||
feeds = [(u'Noticias', u'http://axxon.com.ar/noticias/feed/')]
|
|
||||||
|
|
||||||
remove_attributes = ['style','width','height','font','border','align']
|
|
||||||
|
|
||||||
|
remove_attributes = ['style', 'width', 'height', 'font', 'border', 'align']
|
||||||
|
|
||||||
def adeify_images2(cls, soup):
|
def adeify_images2(cls, soup):
|
||||||
for item in soup.findAll('img'):
|
for item in soup.findAll('img'):
|
||||||
for attrib in ['height','width','border','align','style']:
|
for attrib in ['height', 'width', 'border', 'align', 'style']:
|
||||||
if item.has_key(attrib):
|
if item.has_key(attrib): # noqa
|
||||||
del item[attrib]
|
del item[attrib]
|
||||||
oldParent = item.parent
|
oldParent = item.parent
|
||||||
if oldParent.name == 'a':
|
if oldParent.name == 'a':
|
||||||
oldParent.name == 'p'
|
oldParent.name == 'p'
|
||||||
myIndex = oldParent.contents.index(item)
|
myIndex = oldParent.contents.index(item)
|
||||||
brtag = Tag(soup,'br')
|
brtag = Tag(soup, 'br')
|
||||||
oldParent.insert(myIndex+1,brtag)
|
oldParent.insert(myIndex + 1, brtag)
|
||||||
return soup
|
return soup
|
||||||
|
|
||||||
def preprocess_html(self, soup):
|
def preprocess_html(self, soup):
|
||||||
soup.html['xml:lang'] = self.lang
|
soup.html['xml:lang'] = self.lang
|
||||||
soup.html['lang'] = self.lang
|
soup.html['lang'] = self.lang
|
||||||
mlang = Tag(soup,'meta',[("http-equiv","Content-Language"),("content",self.lang)])
|
mlang = Tag(soup, 'meta', [
|
||||||
soup.html.insert(0,mlang)
|
("http-equiv", "Content-Language"), ("content", self.lang)])
|
||||||
|
soup.html.insert(0, mlang)
|
||||||
return self.adeify_images2(soup)
|
return self.adeify_images2(soup)
|
||||||
|
|
||||||
|
@ -1,5 +1,5 @@
|
|||||||
|
|
||||||
__license__ = 'GPL v3'
|
__license__ = 'GPL v3'
|
||||||
__copyright__ = '2009-2010, Darko Miletic <darko.miletic at gmail.com>'
|
__copyright__ = '2009-2010, Darko Miletic <darko.miletic at gmail.com>'
|
||||||
'''
|
'''
|
||||||
azstarnet.com
|
azstarnet.com
|
||||||
@ -7,53 +7,47 @@ azstarnet.com
|
|||||||
import urllib
|
import urllib
|
||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
|
||||||
|
|
||||||
class Azstarnet(BasicNewsRecipe):
|
class Azstarnet(BasicNewsRecipe):
|
||||||
title = 'Arizona Daily Star'
|
title = 'Arizona Daily Star'
|
||||||
__author__ = 'Darko Miletic'
|
__author__ = 'Darko Miletic'
|
||||||
description = 'news from Arizona'
|
description = 'news from Arizona'
|
||||||
language = 'en'
|
language = 'en'
|
||||||
publisher = 'azstarnet.com'
|
publisher = 'azstarnet.com'
|
||||||
category = 'news, politics, Arizona, USA'
|
category = 'news, politics, Arizona, USA'
|
||||||
oldest_article = 3
|
oldest_article = 3
|
||||||
max_articles_per_feed = 100
|
max_articles_per_feed = 100
|
||||||
no_stylesheets = True
|
no_stylesheets = True
|
||||||
use_embedded_content = False
|
use_embedded_content = False
|
||||||
encoding = 'utf-8'
|
encoding = 'utf-8'
|
||||||
masthead_url = 'http://azstarnet.com/content/tncms/live/global/resources/images/logo.gif'
|
masthead_url = 'http://azstarnet.com/content/tncms/live/global/resources/images/logo.gif'
|
||||||
needs_subscription = True
|
needs_subscription = True
|
||||||
|
|
||||||
conversion_options = {
|
conversion_options = {
|
||||||
'comment' : description
|
'comment': description, 'tags': category, 'publisher': publisher, 'language': language
|
||||||
, 'tags' : category
|
}
|
||||||
, 'publisher' : publisher
|
|
||||||
, 'language' : language
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
def get_browser(self):
|
def get_browser(self):
|
||||||
br = BasicNewsRecipe.get_browser(self)
|
br = BasicNewsRecipe.get_browser(self)
|
||||||
br.open('http://azstarnet.com/')
|
br.open('http://azstarnet.com/')
|
||||||
if self.username is not None and self.password is not None:
|
if self.username is not None and self.password is not None:
|
||||||
data = urllib.urlencode({ 'm':'login'
|
data = urllib.urlencode({'m': 'login', 'u': self.username, 'p': self.password, 'z': 'http://azstarnet.com/'
|
||||||
,'u':self.username
|
})
|
||||||
,'p':self.password
|
br.open('http://azstarnet.com/app/registration/proxy.php', data)
|
||||||
,'z':'http://azstarnet.com/'
|
|
||||||
})
|
|
||||||
br.open('http://azstarnet.com/app/registration/proxy.php',data)
|
|
||||||
return br
|
return br
|
||||||
|
|
||||||
remove_tags = [dict(name=['object','link','iframe','base','img'])]
|
remove_tags = [dict(name=['object', 'link', 'iframe', 'base', 'img'])]
|
||||||
|
|
||||||
|
|
||||||
feeds = [
|
feeds = [
|
||||||
(u'Local News' , u'http://azstarnet.com/search/?f=rss&t=article&c=news/local&l=25&s=start_time&sd=desc')
|
|
||||||
,(u'National News' , u'http://azstarnet.com/search/?f=rss&t=article&c=news/national&l=25&s=start_time&sd=desc')
|
(u'Local News', u'http://azstarnet.com/search/?f=rss&t=article&c=news/local&l=25&s=start_time&sd=desc'),
|
||||||
,(u'World News' , u'http://azstarnet.com/search/?f=rss&t=article&c=news/world&l=25&s=start_time&sd=desc')
|
(u'National News', u'http://azstarnet.com/search/?f=rss&t=article&c=news/national&l=25&s=start_time&sd=desc'),
|
||||||
,(u'Sports' , u'http://azstarnet.com/search/?f=rss&t=article&c=sports&l=25&s=start_time&sd=desc')
|
(u'World News', u'http://azstarnet.com/search/?f=rss&t=article&c=news/world&l=25&s=start_time&sd=desc'),
|
||||||
,(u'Opinion' , u'http://azstarnet.com/search/?f=rss&t=article&c=news/opinion&l=25&s=start_time&sd=desc')
|
(u'Sports', u'http://azstarnet.com/search/?f=rss&t=article&c=sports&l=25&s=start_time&sd=desc'),
|
||||||
,(u'Movies' , u'http://azstarnet.com/search/?f=rss&t=article&c=entertainment/movies&l=25&s=start_time&sd=desc')
|
(u'Opinion', u'http://azstarnet.com/search/?f=rss&t=article&c=news/opinion&l=25&s=start_time&sd=desc'),
|
||||||
,(u'Food' , u'http://azstarnet.com/search/?f=rss&t=article&c=lifestyles/food-and-cooking&l=25&s=start_time&sd=desc')
|
(u'Movies', u'http://azstarnet.com/search/?f=rss&t=article&c=entertainment/movies&l=25&s=start_time&sd=desc'),
|
||||||
]
|
(u'Food', u'http://azstarnet.com/search/?f=rss&t=article&c=lifestyles/food-and-cooking&l=25&s=start_time&sd=desc')
|
||||||
|
]
|
||||||
|
|
||||||
def preprocess_html(self, soup):
|
def preprocess_html(self, soup):
|
||||||
for item in soup.findAll(style=True):
|
for item in soup.findAll(style=True):
|
||||||
@ -62,4 +56,3 @@ class Azstarnet(BasicNewsRecipe):
|
|||||||
|
|
||||||
def print_version(self, url):
|
def print_version(self, url):
|
||||||
return url + '?print=1'
|
return url + '?print=1'
|
||||||
|
|
||||||
|
@ -1,6 +1,6 @@
|
|||||||
# -*- coding: utf-8 -*-
|
# -*- coding: utf-8 -*-
|
||||||
|
|
||||||
__license__ = 'GPL v3'
|
__license__ = 'GPL v3'
|
||||||
__copyright__ = u'2011, Silviu Cotoar\u0103'
|
__copyright__ = u'2011, Silviu Cotoar\u0103'
|
||||||
'''
|
'''
|
||||||
b365.realitatea.net
|
b365.realitatea.net
|
||||||
@ -8,45 +8,40 @@ b365.realitatea.net
|
|||||||
|
|
||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
|
||||||
|
|
||||||
class b365Realitatea(BasicNewsRecipe):
|
class b365Realitatea(BasicNewsRecipe):
|
||||||
title = u'b365 Realitatea'
|
title = u'b365 Realitatea'
|
||||||
__author__ = u'Silviu Cotoar\u0103'
|
__author__ = u'Silviu Cotoar\u0103'
|
||||||
publisher = u'b365 Realitatea'
|
publisher = u'b365 Realitatea'
|
||||||
description = u'b365 Realitatea'
|
description = u'b365 Realitatea'
|
||||||
oldest_article = 5
|
oldest_article = 5
|
||||||
language = 'ro'
|
language = 'ro'
|
||||||
max_articles_per_feed = 100
|
max_articles_per_feed = 100
|
||||||
no_stylesheets = True
|
no_stylesheets = True
|
||||||
use_embedded_content = False
|
use_embedded_content = False
|
||||||
category = 'Ziare,Romania,Bucuresti'
|
category = 'Ziare,Romania,Bucuresti'
|
||||||
encoding = 'utf-8'
|
encoding = 'utf-8'
|
||||||
cover_url = 'http://b365.realitatea.net/wp-content/themes/b/images/b365-logo.png'
|
cover_url = 'http://b365.realitatea.net/wp-content/themes/b/images/b365-logo.png'
|
||||||
|
|
||||||
conversion_options = {
|
conversion_options = {
|
||||||
'comments' : description
|
'comments': description, 'tags': category, 'language': language, 'publisher': publisher
|
||||||
,'tags' : category
|
}
|
||||||
,'language' : language
|
|
||||||
,'publisher' : publisher
|
|
||||||
}
|
|
||||||
|
|
||||||
keep_only_tags = [
|
keep_only_tags = [
|
||||||
dict(name='div', attrs={'class':'newsArticle'})
|
dict(name='div', attrs={'class': 'newsArticle'})
|
||||||
]
|
]
|
||||||
|
|
||||||
remove_tags = [
|
remove_tags = [
|
||||||
dict(name='div', attrs={'class':'date'})
|
dict(name='div', attrs={'class': 'date'}), dict(name='dic', attrs={'class': 'addthis_toolbox addthis_default_style'}), dict(
|
||||||
, dict(name='dic', attrs={'class':'addthis_toolbox addthis_default_style'})
|
name='div', attrs={'class': 'related_posts'}), dict(name='div', attrs={'id': 'RelevantiWidget'})
|
||||||
, dict(name='div', attrs={'class':'related_posts'})
|
]
|
||||||
, dict(name='div', attrs={'id':'RelevantiWidget'})
|
|
||||||
]
|
|
||||||
|
|
||||||
remove_tags_after = [
|
remove_tags_after = [
|
||||||
dict(name='div', attrs={'id':'RelevantiWidget'})
|
dict(name='div', attrs={'id': 'RelevantiWidget'})
|
||||||
]
|
]
|
||||||
feeds = [
|
feeds = [
|
||||||
(u'\u0218tiri', u'http://b365.realitatea.net/rss-full/')
|
(u'\u0218tiri', u'http://b365.realitatea.net/rss-full/')
|
||||||
]
|
]
|
||||||
|
|
||||||
def preprocess_html(self, soup):
|
def preprocess_html(self, soup):
|
||||||
return self.adeify_images(soup)
|
return self.adeify_images(soup)
|
||||||
|
|
||||||
|
@ -1,5 +1,5 @@
|
|||||||
|
|
||||||
__license__ = 'GPL v3'
|
__license__ = 'GPL v3'
|
||||||
__copyright__ = '2008-2012, Darko Miletic <darko.miletic at gmail.com>'
|
__copyright__ = '2008-2012, Darko Miletic <darko.miletic at gmail.com>'
|
||||||
'''
|
'''
|
||||||
b92.net
|
b92.net
|
||||||
@ -7,63 +7,63 @@ b92.net
|
|||||||
import re
|
import re
|
||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
|
||||||
|
|
||||||
class B92(BasicNewsRecipe):
|
class B92(BasicNewsRecipe):
|
||||||
title = 'B92'
|
title = 'B92'
|
||||||
__author__ = 'Darko Miletic'
|
__author__ = 'Darko Miletic'
|
||||||
description = 'Najnovije vesti iz Srbije, regiona i sveta, aktuelne teme iz sveta politike, ekonomije, drustva, foto galerija, kolumne'
|
description = 'Najnovije vesti iz Srbije, regiona i sveta, aktuelne teme iz sveta politike, ekonomije, drustva, foto galerija, kolumne'
|
||||||
publisher = 'B92'
|
publisher = 'B92'
|
||||||
category = 'news, politics, Serbia'
|
category = 'news, politics, Serbia'
|
||||||
oldest_article = 2
|
oldest_article = 2
|
||||||
max_articles_per_feed = 100
|
max_articles_per_feed = 100
|
||||||
no_stylesheets = True
|
no_stylesheets = True
|
||||||
use_embedded_content = False
|
use_embedded_content = False
|
||||||
encoding = 'cp1250'
|
encoding = 'cp1250'
|
||||||
language = 'sr'
|
language = 'sr'
|
||||||
publication_type = 'newsportal'
|
publication_type = 'newsportal'
|
||||||
masthead_url = 'http://b92s.net/v4/img/new-logo.png'
|
masthead_url = 'http://b92s.net/v4/img/new-logo.png'
|
||||||
extra_css = """
|
extra_css = """
|
||||||
@font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)}
|
@font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)}
|
||||||
body{font-family: Arial,Helvetica,sans1,sans-serif}
|
body{font-family: Arial,Helvetica,sans1,sans-serif}
|
||||||
.article-info2,.article-info1{text-transform: uppercase; font-size: small}
|
.article-info2,.article-info1{text-transform: uppercase; font-size: small}
|
||||||
img{display: block}
|
img{display: block}
|
||||||
.sms{font-weight: bold}
|
.sms{font-weight: bold}
|
||||||
"""
|
"""
|
||||||
|
|
||||||
conversion_options = {
|
|
||||||
'comment' : description
|
|
||||||
, 'tags' : category
|
|
||||||
, 'publisher': publisher
|
|
||||||
, 'language' : language
|
|
||||||
, 'linearize_tables' : True
|
|
||||||
}
|
|
||||||
|
|
||||||
preprocess_regexps = [
|
|
||||||
(re.compile(u'\u0110'), lambda match: u'\u00D0'),
|
|
||||||
(re.compile(r'<html.*?<body>', re.DOTALL|re.IGNORECASE), lambda match: '<html><head><title>something</title></head><body>')
|
|
||||||
]
|
|
||||||
|
|
||||||
keep_only_tags = [dict(attrs={'class':['article-info1','article-text']})]
|
|
||||||
remove_attributes = ['width','height','align','hspace','vspace','border','lang','xmlns:fb']
|
|
||||||
remove_tags = [
|
|
||||||
dict(name=['embed','link','base','meta','iframe'])
|
|
||||||
,dict(attrs={'id':'social'})
|
|
||||||
]
|
|
||||||
|
|
||||||
feeds = [
|
conversion_options = {
|
||||||
(u'Vesti' , u'http://www.b92.net/info/rss/vesti.xml' )
|
'comment': description, 'tags': category, 'publisher': publisher, 'language': language, 'linearize_tables': True
|
||||||
,(u'Biz' , u'http://www.b92.net/info/rss/biz.xml' )
|
}
|
||||||
,(u'Sport' , u'http://www.b92.net/info/rss/sport.xml' )
|
|
||||||
,(u'Zivot' , u'http://www.b92.net/info/rss/zivot.xml' )
|
preprocess_regexps = [
|
||||||
,(u'Kultura' , u'http://www.b92.net/info/rss/kultura.xml' )
|
(re.compile(u'\u0110'), lambda match: u'\u00D0'),
|
||||||
,(u'Automobili' , u'http://www.b92.net/info/rss/automobili.xml')
|
(re.compile(r'<html.*?<body>', re.DOTALL | re.IGNORECASE),
|
||||||
,(u'Tehnopolis' , u'http://www.b92.net/info/rss/tehnopolis.xml')
|
lambda match: '<html><head><title>something</title></head><body>')
|
||||||
]
|
]
|
||||||
|
|
||||||
|
keep_only_tags = [dict(attrs={'class': ['article-info1', 'article-text']})]
|
||||||
|
remove_attributes = ['width', 'height', 'align',
|
||||||
|
'hspace', 'vspace', 'border', 'lang', 'xmlns:fb']
|
||||||
|
remove_tags = [
|
||||||
|
dict(name=['embed', 'link', 'base', 'meta', 'iframe']), dict(
|
||||||
|
attrs={'id': 'social'})
|
||||||
|
]
|
||||||
|
|
||||||
|
feeds = [
|
||||||
|
|
||||||
|
(u'Vesti', u'http://www.b92.net/info/rss/vesti.xml'),
|
||||||
|
(u'Biz', u'http://www.b92.net/info/rss/biz.xml'),
|
||||||
|
(u'Sport', u'http://www.b92.net/info/rss/sport.xml'),
|
||||||
|
(u'Zivot', u'http://www.b92.net/info/rss/zivot.xml'),
|
||||||
|
(u'Kultura', u'http://www.b92.net/info/rss/kultura.xml'),
|
||||||
|
(u'Automobili', u'http://www.b92.net/info/rss/automobili.xml'),
|
||||||
|
(u'Tehnopolis', u'http://www.b92.net/info/rss/tehnopolis.xml')
|
||||||
|
]
|
||||||
|
|
||||||
def preprocess_html(self, soup):
|
def preprocess_html(self, soup):
|
||||||
for item in soup.findAll(style=True):
|
for item in soup.findAll(style=True):
|
||||||
del item['style']
|
del item['style']
|
||||||
for alink in soup.findAll('a'):
|
for alink in soup.findAll('a'):
|
||||||
if alink.string is not None:
|
if alink.string is not None:
|
||||||
tstr = alink.string
|
tstr = alink.string
|
||||||
alink.replaceWith(tstr)
|
alink.replaceWith(tstr)
|
||||||
return soup
|
return soup
|
||||||
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
x
Reference in New Issue
Block a user