KG changes
BIN
resources/images/news/di.png
Normal file
After Width: | Height: | Size: 733 B |
BIN
resources/images/news/eclicto.png
Normal file
After Width: | Height: | Size: 401 B |
BIN
resources/images/news/eksiazki.png
Normal file
After Width: | Height: | Size: 475 B |
BIN
resources/images/news/interia_fakty.png
Normal file
After Width: | Height: | Size: 626 B |
BIN
resources/images/news/interia_sport.png
Normal file
After Width: | Height: | Size: 626 B |
BIN
resources/images/news/legitymizm.png
Normal file
After Width: | Height: | Size: 808 B |
BIN
resources/images/news/michalkiewicz.png
Normal file
After Width: | Height: | Size: 1.4 KiB |
BIN
resources/images/news/nrc.nl.png
Normal file
After Width: | Height: | Size: 1.8 KiB |
@ -2,18 +2,22 @@
|
|||||||
__license__ = 'GPL v3'
|
__license__ = 'GPL v3'
|
||||||
__author__ = 'Lorenzo Vigentini, based on Darko Miletic'
|
__author__ = 'Lorenzo Vigentini, based on Darko Miletic'
|
||||||
__copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>, Lorenzo Vigentini <l.vigentini at gmail.com>'
|
__copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>, Lorenzo Vigentini <l.vigentini at gmail.com>'
|
||||||
__version__ = 'v1.01'
|
__version__ = 'v1.02'
|
||||||
__date__ = '10, January 2010'
|
__date__ = '14, March 2010'
|
||||||
__description__ = 'Italian daily newspaper (english version)'
|
__description__ = 'Italian daily newspaper (english version)'
|
||||||
|
# NOTE: the feeds url are broken on the main site as the permalink structure has been changed erroneously ie:
|
||||||
|
# actual link in feed http://www.corriere.it/english/10_marzo_11/legitimate_impediment_approved_de9ba480-2cfd-11df-a00c-00144f02aabe.shtml
|
||||||
|
# this needs to be change to
|
||||||
|
# real feed URL http://www.corriere.it/International/english/articoli/2010/03/11/legitimate_impediment_approved.shtml
|
||||||
'''
|
'''
|
||||||
http://www.corriere.it/
|
http://www.corriere.it/
|
||||||
'''
|
'''
|
||||||
|
|
||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
|
||||||
class ilCorriere(BasicNewsRecipe):
|
class ilCorriereEn(BasicNewsRecipe):
|
||||||
__author__ = 'Lorenzo Vigentini, based on Darko Miletic'
|
author = 'Lorenzo Vigentini, based on Darko Miletic'
|
||||||
description = 'Italian daily newspaper (english version)'
|
description = 'Italian daily newspaper (english version)'
|
||||||
|
|
||||||
cover_url = 'http://images.corriereobjects.it/images/static/common/logo_home.gif?v=200709121520'
|
cover_url = 'http://images.corriereobjects.it/images/static/common/logo_home.gif?v=200709121520'
|
||||||
title = u'Il Corriere della sera (english) '
|
title = u'Il Corriere della sera (english) '
|
||||||
@ -23,7 +27,7 @@ class ilCorriere(BasicNewsRecipe):
|
|||||||
language = 'en'
|
language = 'en'
|
||||||
timefmt = '[%a, %d %b, %Y]'
|
timefmt = '[%a, %d %b, %Y]'
|
||||||
|
|
||||||
oldest_article = 1
|
oldest_article = 5
|
||||||
max_articles_per_feed = 100
|
max_articles_per_feed = 100
|
||||||
use_embedded_content = False
|
use_embedded_content = False
|
||||||
recursion = 10
|
recursion = 10
|
||||||
@ -31,14 +35,30 @@ class ilCorriere(BasicNewsRecipe):
|
|||||||
remove_javascript = True
|
remove_javascript = True
|
||||||
no_stylesheets = True
|
no_stylesheets = True
|
||||||
|
|
||||||
html2lrf_options = [
|
def get_article_url(self, article):
|
||||||
'--comment', description
|
articleUrl= article.get('link')
|
||||||
, '--category', category
|
segments = articleUrl.split('/')
|
||||||
, '--publisher', publisher
|
basename = '/'.join(segments[:3]) + '/' + 'International/english/articoli/'
|
||||||
, '--ignore-tables'
|
|
||||||
]
|
|
||||||
|
|
||||||
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"\nlinearize_tables=True'
|
#the date has to be redone with the url structure
|
||||||
|
mlist1 = ['gennaio','febbraio','marzo','aprile','maggio','giugno','luglio','agosto','settembre','ottobre','novembre','dicembre']
|
||||||
|
mlist2 = ['01','02','03','04','05','06','07','08','09','10','11','12']
|
||||||
|
myDate = segments[4].split('_')
|
||||||
|
x=0
|
||||||
|
for x in range(11):
|
||||||
|
if myDate[1] == mlist1[x]:
|
||||||
|
noMonth=mlist2[x]
|
||||||
|
break
|
||||||
|
|
||||||
|
newDateUrl= '20'+ myDate[0] + '/' + noMonth + '/' + myDate[2] + '/'
|
||||||
|
|
||||||
|
#clean the article title
|
||||||
|
articleURLseg=segments[5].split('-')
|
||||||
|
myArticle = (articleURLseg[0])[:-9] + '.shtml'
|
||||||
|
|
||||||
|
myURL= basename + newDateUrl + myArticle
|
||||||
|
#print myURL
|
||||||
|
return myURL
|
||||||
|
|
||||||
keep_only_tags = [dict(name='div', attrs={'class':['news-dettaglio article','article']})]
|
keep_only_tags = [dict(name='div', attrs={'class':['news-dettaglio article','article']})]
|
||||||
|
|
||||||
|
@ -15,42 +15,42 @@ class DziennikInternautowRecipe(BasicNewsRecipe):
|
|||||||
language = 'pl'
|
language = 'pl'
|
||||||
|
|
||||||
title = u'Dziennik Internautow'
|
title = u'Dziennik Internautow'
|
||||||
publisher = u'Dziennik Internaut\xc3\xb3w Sp. z o.o.'
|
publisher = u'Dziennik Internaut\u00f3w Sp. z o.o.'
|
||||||
description =u'Internet w \xc5\xbcyciu i biznesie. Porady, wywiady, interwencje, bezpiecze\xc5\x84stwo w Sieci, technologia.'
|
description = u'Internet w \u017cyciu i biznesie. Porady, wywiady, interwencje, bezpiecze\u0144stwo w Sieci, technologia.'
|
||||||
|
|
||||||
max_articles_per_feed = 100
|
max_articles_per_feed = 100
|
||||||
oldest_article = 7
|
oldest_article = 7
|
||||||
cover_url = 'http://di.com.pl/pic/logo_di_norm.gif'
|
cover_url = 'http://di.com.pl/pic/logo_di_norm.gif'
|
||||||
|
|
||||||
no_stylesheets = True
|
no_stylesheets = True
|
||||||
remove_javascript = True
|
remove_javascript = True
|
||||||
encoding = 'utf-8'
|
encoding = 'utf-8'
|
||||||
|
|
||||||
extra_css = '''
|
extra_css = '''
|
||||||
.fotodesc{font-size: 75%;}
|
.fotodesc{font-size: 75%;}
|
||||||
.pub_data{font-size: 75%;}
|
.pub_data{font-size: 75%;}
|
||||||
.fotonews{clear: both; padding-top: 10px; padding-bottom: 10px;}
|
.fotonews{clear: both; padding-top: 10px; padding-bottom: 10px;}
|
||||||
#pub_foto{font-size: 75%; float: left; padding-right: 10px;}
|
#pub_foto{font-size: 75%; float: left; padding-right: 10px;}
|
||||||
'''
|
'''
|
||||||
|
|
||||||
feeds = [
|
feeds = [
|
||||||
(u'Dziennik Internautów', u'http://feeds.feedburner.com/glowny-di')
|
(u'Dziennik Internaut\u00f3w', u'http://feeds.feedburner.com/glowny-di')
|
||||||
]
|
]
|
||||||
|
|
||||||
keep_only_tags = [
|
keep_only_tags = [
|
||||||
dict(name = 'div', attrs = {'id' : 'pub_head'}),
|
dict(name = 'div', attrs = {'id' : 'pub_head'}),
|
||||||
dict(name = 'div', attrs = {'id' : 'pub_content'})
|
dict(name = 'div', attrs = {'id' : 'pub_content'})
|
||||||
]
|
]
|
||||||
|
|
||||||
remove_tags = [
|
remove_tags = [
|
||||||
dict(name = 'div', attrs = {'class' : 'poradniki_context'}),
|
dict(name = 'div', attrs = {'class' : 'poradniki_context'}),
|
||||||
dict(name = 'div', attrs = {'class' : 'uniBox'}),
|
dict(name = 'div', attrs = {'class' : 'uniBox'}),
|
||||||
dict(name = 'object', attrs = {}),
|
dict(name = 'object', attrs = {}),
|
||||||
dict(name = 'h3', attrs = {})
|
dict(name = 'h3', attrs = {})
|
||||||
]
|
]
|
||||||
|
|
||||||
preprocess_regexps = [
|
preprocess_regexps = [
|
||||||
(re.compile(i[0], re.IGNORECASE | re.DOTALL), i[1]) for i in
|
(re.compile(i[0], re.IGNORECASE | re.DOTALL), i[1]) for i in
|
||||||
[
|
[
|
||||||
(r', <a href="http://di.com.pl/komentarze,.*?</div>', lambda match: '</div>'),
|
(r', <a href="http://di.com.pl/komentarze,.*?</div>', lambda match: '</div>'),
|
||||||
(r'<div class="fotonews".*?">', lambda match: '<div class="fotonews">'),
|
(r'<div class="fotonews".*?">', lambda match: '<div class="fotonews">'),
|
||||||
|
@ -1,6 +1,6 @@
|
|||||||
#!/usr/bin/env python
|
#!/usr/bin/env python
|
||||||
|
|
||||||
__license__ = 'GPL v3'
|
__license__ = 'GPL v3'
|
||||||
__author__ = 'Mori'
|
__author__ = 'Mori'
|
||||||
__version__ = 'v. 0.1'
|
__version__ = 'v. 0.1'
|
||||||
'''
|
'''
|
||||||
@ -11,39 +11,39 @@ from calibre.web.feeds.news import BasicNewsRecipe
|
|||||||
import re
|
import re
|
||||||
|
|
||||||
class BlogeClictoRecipe(BasicNewsRecipe):
|
class BlogeClictoRecipe(BasicNewsRecipe):
|
||||||
__author__ = 'Mori'
|
__author__ = 'Mori'
|
||||||
language = 'pl'
|
language = 'pl'
|
||||||
|
|
||||||
title = u'Blog eClicto'
|
title = u'Blog eClicto'
|
||||||
publisher = u'Blog eClicto'
|
publisher = u'Blog eClicto'
|
||||||
description = u'Blog o e-papierze i e-bookach'
|
description = u'Blog o e-papierze i e-bookach'
|
||||||
|
|
||||||
max_articles_per_feed = 100
|
max_articles_per_feed = 100
|
||||||
cover_url = 'http://blog.eclicto.pl/wordpress/wp-content/themes/blog_eclicto/g/logo.gif'
|
cover_url = 'http://blog.eclicto.pl/wordpress/wp-content/themes/blog_eclicto/g/logo.gif'
|
||||||
|
|
||||||
no_stylesheets = True
|
no_stylesheets = True
|
||||||
remove_javascript = True
|
remove_javascript = True
|
||||||
encoding = 'utf-8'
|
encoding = 'utf-8'
|
||||||
|
|
||||||
extra_css = '''
|
extra_css = '''
|
||||||
img{float: left; padding-right: 10px; padding-bottom: 5px;}
|
img{float: left; padding-right: 10px; padding-bottom: 5px;}
|
||||||
'''
|
'''
|
||||||
|
|
||||||
feeds = [
|
feeds = [
|
||||||
(u'Blog eClicto', u'http://blog.eclicto.pl/feed/')
|
(u'Blog eClicto', u'http://blog.eclicto.pl/feed/')
|
||||||
]
|
]
|
||||||
|
|
||||||
remove_tags = [
|
remove_tags = [
|
||||||
dict(name = 'span', attrs = {'id' : 'tags'})
|
dict(name = 'span', attrs = {'id' : 'tags'})
|
||||||
]
|
]
|
||||||
|
|
||||||
remove_tags_after = [
|
remove_tags_after = [
|
||||||
dict(name = 'div', attrs = {'class' : 'post'})
|
dict(name = 'div', attrs = {'class' : 'post'})
|
||||||
]
|
]
|
||||||
|
|
||||||
preprocess_regexps = [
|
preprocess_regexps = [
|
||||||
(re.compile(i[0], re.IGNORECASE | re.DOTALL), i[1]) for i in
|
(re.compile(i[0], re.IGNORECASE | re.DOTALL), i[1]) for i in
|
||||||
[
|
[
|
||||||
(r'\s*</', lambda match: '</'),
|
(r'\s*</', lambda match: '</'),
|
||||||
]
|
]
|
||||||
]
|
]
|
@ -11,7 +11,7 @@ from calibre.web.feeds.news import BasicNewsRecipe
|
|||||||
class eksiazki(BasicNewsRecipe):
|
class eksiazki(BasicNewsRecipe):
|
||||||
|
|
||||||
title = u'eKsiazki.org'
|
title = u'eKsiazki.org'
|
||||||
desciption = u'Twoje centrum wiedzy o ePapierze i eBookach'
|
description = u'Twoje centrum wiedzy o ePapierze i eBookach'
|
||||||
language = 'pl'
|
language = 'pl'
|
||||||
__author__ = u'Tomasz D\u0142ugosz'
|
__author__ = u'Tomasz D\u0142ugosz'
|
||||||
no_stylesheets = True
|
no_stylesheets = True
|
||||||
|
34
resources/recipes/fronda.recipe
Normal file
@ -0,0 +1,34 @@
|
|||||||
|
#!/usr/bin/env python
|
||||||
|
|
||||||
|
__license__ = 'GPL v3'
|
||||||
|
__copyright__ = u'2010, Tomasz Dlugosz <tomek3d@gmail.com>'
|
||||||
|
'''
|
||||||
|
fronda.pl
|
||||||
|
'''
|
||||||
|
|
||||||
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
import re
|
||||||
|
|
||||||
|
class Fronda(BasicNewsRecipe):
|
||||||
|
title = u'Fronda.pl'
|
||||||
|
publisher = u'Fronda.pl'
|
||||||
|
description = u'Portal po\u015bwi\u0119cony - Infformacje'
|
||||||
|
language = 'pl'
|
||||||
|
__author__ = u'Tomasz D\u0142ugosz'
|
||||||
|
oldest_article = 7
|
||||||
|
max_articles_per_feed = 100
|
||||||
|
use_embedded_content = False
|
||||||
|
|
||||||
|
feeds = [(u'Infformacje', u'http://fronda.pl/news/feed')]
|
||||||
|
|
||||||
|
keep_only_tags = [dict(name='h1', attrs={'class':'big'}),
|
||||||
|
dict(name='ul', attrs={'class':'about clear'}),
|
||||||
|
dict(name='div', attrs={'class':'content'})]
|
||||||
|
preprocess_regexps = [
|
||||||
|
(re.compile(i[0], re.IGNORECASE | re.DOTALL), i[1]) for i in
|
||||||
|
[ (r'<a href="#" class="print">Drukuj</a>', lambda match: ''),
|
||||||
|
(r'<p><a href="http://fronda.pl/sklepy">.*</a></p>', lambda match: ''),
|
||||||
|
(r'<p><a href="http://fronda.pl/pasaz">.*</a></p>', lambda match: ''),
|
||||||
|
(r'<h3><strong>W.* lektury.*</a></p></div>', lambda match: '</div>'),
|
||||||
|
(r'<h3>Zobacz t.*?</div>', lambda match: '</div>') ]
|
||||||
|
]
|
@ -10,6 +10,7 @@ from calibre.web.feeds.news import BasicNewsRecipe
|
|||||||
|
|
||||||
class InteriaFakty(BasicNewsRecipe):
|
class InteriaFakty(BasicNewsRecipe):
|
||||||
title = u'Interia.pl - Fakty'
|
title = u'Interia.pl - Fakty'
|
||||||
|
description = u'Fakty ze strony interia.pl'
|
||||||
language = 'pl'
|
language = 'pl'
|
||||||
oldest_article = 7
|
oldest_article = 7
|
||||||
__author__ = u'Tomasz D\u0142ugosz'
|
__author__ = u'Tomasz D\u0142ugosz'
|
||||||
|
@ -11,6 +11,7 @@ from calibre.web.feeds.news import BasicNewsRecipe
|
|||||||
|
|
||||||
class InteriaSport(BasicNewsRecipe):
|
class InteriaSport(BasicNewsRecipe):
|
||||||
title = u'Interia.pl - Sport'
|
title = u'Interia.pl - Sport'
|
||||||
|
description = u'Sport ze strony interia.pl'
|
||||||
language = 'pl'
|
language = 'pl'
|
||||||
oldest_article = 7
|
oldest_article = 7
|
||||||
__author__ = u'Tomasz D\u0142ugosz'
|
__author__ = u'Tomasz D\u0142ugosz'
|
||||||
@ -30,7 +31,8 @@ class InteriaSport(BasicNewsRecipe):
|
|||||||
|
|
||||||
keep_only_tags = [dict(name='div', attrs={'id':'article'})]
|
keep_only_tags = [dict(name='div', attrs={'id':'article'})]
|
||||||
|
|
||||||
remove_tags = [dict(name='div', attrs={'class':'object gallery'})]
|
remove_tags = [dict(name='div', attrs={'class':'object gallery'}),
|
||||||
|
dict(name='div', attrs={'class':'box fontSizeSwitch'})]
|
||||||
|
|
||||||
extra_css = '''
|
extra_css = '''
|
||||||
.articleDate {
|
.articleDate {
|
||||||
|
@ -1,6 +1,6 @@
|
|||||||
#!/usr/bin/env python
|
#!/usr/bin/env python
|
||||||
|
|
||||||
__license__ = 'GPL v3'
|
__license__ = 'GPL v3'
|
||||||
__author__ = 'Mori'
|
__author__ = 'Mori'
|
||||||
__version__ = 'v. 0.1'
|
__version__ = 'v. 0.1'
|
||||||
'''
|
'''
|
||||||
@ -10,34 +10,34 @@ olgierd.bblog.pl
|
|||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
|
||||||
class LegeArtisRecipe(BasicNewsRecipe):
|
class LegeArtisRecipe(BasicNewsRecipe):
|
||||||
__author__ = 'Mori'
|
__author__ = 'Mori'
|
||||||
language = 'pl'
|
language = 'pl'
|
||||||
|
|
||||||
title = u'Lege Artis'
|
title = u'Lege Artis'
|
||||||
publisher = u'Olgierd Rudak'
|
publisher = u'Olgierd Rudak'
|
||||||
description = u'Wszystko, co chcieliby\xc5\x9bcie wiedzie\xc4\x87 o prawie, ale wstydzicie si\xc4\x99 zapyta\xc4\x87'
|
description = u'Wszystko, co chcieliby\u015bcie wiedzie\u0107 o prawie, ale wstydzicie si\u0119 zapyta\u0107'
|
||||||
|
|
||||||
max_articles_per_feed = 100
|
max_articles_per_feed = 100
|
||||||
|
|
||||||
no_stylesheets = True
|
no_stylesheets = True
|
||||||
remove_javascript = True
|
remove_javascript = True
|
||||||
|
|
||||||
extra_css = '''
|
extra_css = '''
|
||||||
img{clear: both;}
|
img{clear: both;}
|
||||||
'''
|
'''
|
||||||
|
|
||||||
feeds = [
|
feeds = [
|
||||||
(u'Lege Artis', u'http://olgierd.bblog.pl/rss/rss20.xml')
|
(u'Lege Artis', u'http://olgierd.bblog.pl/rss/rss20.xml')
|
||||||
]
|
]
|
||||||
|
|
||||||
keep_only_tags = [
|
keep_only_tags = [
|
||||||
dict(name = 'div', attrs = {'class' : 'post_title'}),
|
dict(name = 'div', attrs = {'class' : 'post_title'}),
|
||||||
dict(name = 'div', attrs = {'class' : 'post_date'}),
|
dict(name = 'div', attrs = {'class' : 'post_date'}),
|
||||||
dict(name = 'div', attrs = {'class' : 'post_content'})
|
dict(name = 'div', attrs = {'class' : 'post_content'})
|
||||||
]
|
]
|
||||||
|
|
||||||
remove_tags = [
|
remove_tags = [
|
||||||
dict(name = 'div', attrs = {'id' : 'bb_tools'}),
|
dict(name = 'div', attrs = {'id' : 'bb_tools'}),
|
||||||
dict(name = 'div', attrs = {'class' : 'post_comments'}),
|
dict(name = 'div', attrs = {'class' : 'post_comments'}),
|
||||||
dict(name = 'object', attrs = {})
|
dict(name = 'object', attrs = {})
|
||||||
]
|
]
|
||||||
|
@ -10,6 +10,7 @@ from calibre.web.feeds.news import BasicNewsRecipe
|
|||||||
|
|
||||||
class Legitymizm(BasicNewsRecipe):
|
class Legitymizm(BasicNewsRecipe):
|
||||||
title = u'Organizacja Monarchist\xf3w Polskich'
|
title = u'Organizacja Monarchist\xf3w Polskich'
|
||||||
|
description = u'Portal legitymistyczny'
|
||||||
language = 'pl'
|
language = 'pl'
|
||||||
oldest_article = 7
|
oldest_article = 7
|
||||||
__author__ = u'Tomasz D\u0142ugosz'
|
__author__ = u'Tomasz D\u0142ugosz'
|
||||||
|
@ -12,7 +12,7 @@ from calibre.web.feeds.news import BasicNewsRecipe
|
|||||||
|
|
||||||
class michalkiewicz(BasicNewsRecipe):
|
class michalkiewicz(BasicNewsRecipe):
|
||||||
title = u'Stanis\u0142aw Michalkiewicz'
|
title = u'Stanis\u0142aw Michalkiewicz'
|
||||||
desciption = u'Strona autorska * felietony * artyku\u0142y * komentarze'
|
description = u'Strona autorska * felietony * artyku\u0142y * komentarze'
|
||||||
__author__ = u'Tomasz D\u0142ugosz'
|
__author__ = u'Tomasz D\u0142ugosz'
|
||||||
language = 'pl'
|
language = 'pl'
|
||||||
oldest_article = 7
|
oldest_article = 7
|
||||||
|
@ -12,7 +12,7 @@ from calibre.web.feeds.news import BasicNewsRecipe
|
|||||||
|
|
||||||
class NCzas(BasicNewsRecipe):
|
class NCzas(BasicNewsRecipe):
|
||||||
title = u'Najwy\u017cszy Czas!'
|
title = u'Najwy\u017cszy Czas!'
|
||||||
desciption = u'Najwy\u017cszy Czas!\nwydanie internetowe'
|
description = u'Najwy\u017cszy Czas!\nwydanie internetowe'
|
||||||
__author__ = u'Tomasz D\u0142ugosz'
|
__author__ = u'Tomasz D\u0142ugosz'
|
||||||
language = 'pl'
|
language = 'pl'
|
||||||
oldest_article = 7
|
oldest_article = 7
|
||||||
|
50
resources/recipes/nrc.nl.recipe
Normal file
@ -0,0 +1,50 @@
|
|||||||
|
__license__ = 'GPL v3'
|
||||||
|
__copyright__ = '2010, Darko Miletic <darko.miletic at gmail.com>'
|
||||||
|
'''
|
||||||
|
nrc.nl
|
||||||
|
'''
|
||||||
|
|
||||||
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
|
||||||
|
class Pagina12(BasicNewsRecipe):
|
||||||
|
title = 'NRC'
|
||||||
|
__author__ = 'Darko Miletic'
|
||||||
|
description = 'News from Netherlands'
|
||||||
|
publisher = 'nrc.nl'
|
||||||
|
category = 'news, politics, Netherlands'
|
||||||
|
oldest_article = 2
|
||||||
|
max_articles_per_feed = 200
|
||||||
|
no_stylesheets = True
|
||||||
|
encoding = 'cp1252'
|
||||||
|
use_embedded_content = False
|
||||||
|
language = 'nl'
|
||||||
|
country = 'NL'
|
||||||
|
remove_empty_feeds = True
|
||||||
|
masthead_url = 'http://www.nrc.nl/nrc.nl/images/logo_nrc.png'
|
||||||
|
extra_css = ' body{font-family: Verdana,Arial,Helvetica,sans-serif } img{margin-bottom: 0.4em} h1,h2,h3{text-align:left} '
|
||||||
|
|
||||||
|
conversion_options = {
|
||||||
|
'comment' : description
|
||||||
|
, 'tags' : category
|
||||||
|
, 'publisher' : publisher
|
||||||
|
, 'language' : language
|
||||||
|
}
|
||||||
|
|
||||||
|
keep_only_tags = [dict(name='div',attrs={'class':'article clearfix'})]
|
||||||
|
|
||||||
|
|
||||||
|
feeds = [
|
||||||
|
(u'Voorpagina' , u'http://feeds.feedburner.com/NRCHandelsbladVoorpagina' )
|
||||||
|
,(u'Binnenland' , u'http://feeds.feedburner.com/NRCHandelsbladBinnenland' )
|
||||||
|
,(u'Buitenland' , u'http://feeds.feedburner.com/NRCHandelsbladBuitenland' )
|
||||||
|
,(u'Economie' , u'http://feeds.feedburner.com/NRCHandelsbladEconomie' )
|
||||||
|
,(u'Kunst & Film' , u'http://feeds.feedburner.com/nrc/NRCHandelsbladKunstEnFilm')
|
||||||
|
,(u'Sport' , u'http://feeds.feedburner.com/NRCHandelsbladSport' )
|
||||||
|
,(u'Wetenschap ' , u'http://www.nrc.nl/rss/wetenschap' )
|
||||||
|
]
|
||||||
|
|
||||||
|
def print_version(self, url):
|
||||||
|
return url + '?service=Print'
|
||||||
|
|
||||||
|
def preprocess_html(self, soup):
|
||||||
|
return self.adeify_images(soup)
|
52
resources/recipes/runa.recipe
Normal file
@ -0,0 +1,52 @@
|
|||||||
|
#!/usr/bin/env python
|
||||||
|
|
||||||
|
__license__ = 'GPL v3'
|
||||||
|
__author__ = 'Mori'
|
||||||
|
__version__ = 'v. 0.1'
|
||||||
|
'''
|
||||||
|
www.runa.pl/blog
|
||||||
|
'''
|
||||||
|
|
||||||
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
import re
|
||||||
|
|
||||||
|
class FantazmatyRecipe(BasicNewsRecipe):
|
||||||
|
__author__ = 'Mori'
|
||||||
|
language = 'pl'
|
||||||
|
|
||||||
|
title = u'Fantazmaty'
|
||||||
|
publisher = u'Agencja Wydawnicza Runa'
|
||||||
|
description = u'Blog Agencji Wydawniczej Runa'
|
||||||
|
|
||||||
|
no_stylesheets = True
|
||||||
|
remove_javascript = True
|
||||||
|
encoding = 'utf-8'
|
||||||
|
|
||||||
|
oldest_article = 100
|
||||||
|
max_articles_per_feed = 100
|
||||||
|
|
||||||
|
extra_css = '''
|
||||||
|
img{float: left; padding-right: 10px; padding-bottom: 5px;}
|
||||||
|
'''
|
||||||
|
|
||||||
|
feeds = [
|
||||||
|
(u'Fantazmaty', u'http://www.runa.pl/blog/rss.xml')
|
||||||
|
]
|
||||||
|
|
||||||
|
remove_tags = [
|
||||||
|
dict(name = 'div', attrs = {'class' : 'path'}),
|
||||||
|
dict(name = 'div', attrs = {'class' : 'drdot'}),
|
||||||
|
dict(name = 'div', attrs = {'class' : 'picture'})
|
||||||
|
]
|
||||||
|
|
||||||
|
remove_tags_after = [
|
||||||
|
dict(name = 'div', attrs = {'class' : 'content'})
|
||||||
|
]
|
||||||
|
|
||||||
|
preprocess_regexps = [
|
||||||
|
(re.compile(i[0], re.IGNORECASE | re.DOTALL), i[1]) for i in
|
||||||
|
[
|
||||||
|
(r'<body>.*?<div id="primary"', lambda match: '<body><div id="primary"'),
|
||||||
|
(r'<!--.*?-->', lambda match: '')
|
||||||
|
]
|
||||||
|
]
|
@ -1,35 +1,25 @@
|
|||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
|
||||||
class SanFranciscoBayGuardian(BasicNewsRecipe):
|
class SanFranciscoBayGuardian(BasicNewsRecipe):
|
||||||
title = u'San Francisco Bay Guardian'
|
title = u'San Francisco Bay Guardian'
|
||||||
language = 'en'
|
language = 'en'
|
||||||
__author__ = 'Krittika Goyal'
|
__author__ = 'Krittika Goyal'
|
||||||
oldest_article = 31 #days
|
oldest_article = 31 #days
|
||||||
max_articles_per_feed = 25
|
max_articles_per_feed = 25
|
||||||
#encoding = 'latin1'
|
|
||||||
|
|
||||||
no_stylesheets = True
|
no_stylesheets = True
|
||||||
#remove_tags_before = dict(name='div', attrs={'id':'story_header'})
|
|
||||||
#remove_tags_after = dict(name='div', attrs={'id':'shirttail'})
|
|
||||||
remove_tags = [
|
remove_tags = [
|
||||||
dict(name='iframe'),
|
dict(name='iframe'),
|
||||||
#dict(name='div', attrs={'class':'related-articles'}),
|
|
||||||
#dict(name='div', attrs={'id':['story_tools', 'toolbox', 'shirttail', 'comment_widget']}),
|
|
||||||
#dict(name='ul', attrs={'class':'article-tools'}),
|
|
||||||
#dict(name='ul', attrs={'id':'story_tabs'}),
|
|
||||||
]
|
]
|
||||||
|
|
||||||
|
|
||||||
feeds = [
|
feeds = [
|
||||||
('sfbg', 'http://www.sfbg.com/rss.xml'),
|
('sfbg', 'http://www.sfbg.com/rss.xml'),
|
||||||
|
('politics', 'http://www.sfbg.com/politics/rss.xml'),
|
||||||
|
('blogs', 'http://www.sfbg.com/blog/rss.xml'),
|
||||||
|
('pixel_vision', 'http://www.sfbg.com/pixel_vision/rss.xml'),
|
||||||
|
('bruce', 'http://www.sfbg.com/bruce/rss.xml'),
|
||||||
]
|
]
|
||||||
|
|
||||||
|
|
||||||
#def preprocess_html(self, soup):
|
|
||||||
#story = soup.find(name='div', attrs={'id':'story_body'})
|
|
||||||
#td = heading.findParent(name='td')
|
|
||||||
#td.extract()
|
|
||||||
#soup = BeautifulSoup('<html><head><title>t</title></head><body></body></html>')
|
|
||||||
#body = soup.find(name='body')
|
|
||||||
#body.insert(0, story)
|
|
||||||
#return soup
|
|
||||||
|