Update various Polish recipes. And new ones for fronda and runa by Tomasz Dugosz and Mori, resp.

This commit is contained in:
Kovid Goyal 2010-03-13 13:52:50 -07:00
parent 8c5fbf475f
commit 2c20e69d63
18 changed files with 169 additions and 79 deletions

Binary file not shown.

After

Width:  |  Height:  |  Size: 733 B

Binary file not shown.

After

Width:  |  Height:  |  Size: 401 B

Binary file not shown.

After

Width:  |  Height:  |  Size: 475 B

Binary file not shown.

After

Width:  |  Height:  |  Size: 626 B

Binary file not shown.

After

Width:  |  Height:  |  Size: 626 B

Binary file not shown.

After

Width:  |  Height:  |  Size: 808 B

Binary file not shown.

After

Width:  |  Height:  |  Size: 1.4 KiB

View File

@ -15,8 +15,8 @@ class DziennikInternautowRecipe(BasicNewsRecipe):
language = 'pl' language = 'pl'
title = u'Dziennik Internautow' title = u'Dziennik Internautow'
publisher = u'Dziennik Internaut\xc3\xb3w Sp. z o.o.' publisher = u'Dziennik Internaut\u00f3w Sp. z o.o.'
description =u'Internet w \xc5\xbcyciu i biznesie. Porady, wywiady, interwencje, bezpiecze\xc5\x84stwo w Sieci, technologia.' description = u'Internet w \u017cyciu i biznesie. Porady, wywiady, interwencje, bezpiecze\u0144stwo w Sieci, technologia.'
max_articles_per_feed = 100 max_articles_per_feed = 100
oldest_article = 7 oldest_article = 7
@ -34,7 +34,7 @@ class DziennikInternautowRecipe(BasicNewsRecipe):
''' '''
feeds = [ feeds = [
(u'Dziennik Internautów', u'http://feeds.feedburner.com/glowny-di') (u'Dziennik Internaut\u00f3w', u'http://feeds.feedburner.com/glowny-di')
] ]
keep_only_tags = [ keep_only_tags = [

View File

@ -1,6 +1,6 @@
#!/usr/bin/env python #!/usr/bin/env python
__license__ = 'GPL v3' __license__ = 'GPL v3'
__author__ = 'Mori' __author__ = 'Mori'
__version__ = 'v. 0.1' __version__ = 'v. 0.1'
''' '''
@ -11,39 +11,39 @@ from calibre.web.feeds.news import BasicNewsRecipe
import re import re
class BlogeClictoRecipe(BasicNewsRecipe): class BlogeClictoRecipe(BasicNewsRecipe):
__author__ = 'Mori' __author__ = 'Mori'
language = 'pl' language = 'pl'
title = u'Blog eClicto' title = u'Blog eClicto'
publisher = u'Blog eClicto' publisher = u'Blog eClicto'
description = u'Blog o e-papierze i e-bookach' description = u'Blog o e-papierze i e-bookach'
max_articles_per_feed = 100 max_articles_per_feed = 100
cover_url = 'http://blog.eclicto.pl/wordpress/wp-content/themes/blog_eclicto/g/logo.gif' cover_url = 'http://blog.eclicto.pl/wordpress/wp-content/themes/blog_eclicto/g/logo.gif'
no_stylesheets = True no_stylesheets = True
remove_javascript = True remove_javascript = True
encoding = 'utf-8' encoding = 'utf-8'
extra_css = ''' extra_css = '''
img{float: left; padding-right: 10px; padding-bottom: 5px;} img{float: left; padding-right: 10px; padding-bottom: 5px;}
''' '''
feeds = [ feeds = [
(u'Blog eClicto', u'http://blog.eclicto.pl/feed/') (u'Blog eClicto', u'http://blog.eclicto.pl/feed/')
] ]
remove_tags = [ remove_tags = [
dict(name = 'span', attrs = {'id' : 'tags'}) dict(name = 'span', attrs = {'id' : 'tags'})
] ]
remove_tags_after = [ remove_tags_after = [
dict(name = 'div', attrs = {'class' : 'post'}) dict(name = 'div', attrs = {'class' : 'post'})
] ]
preprocess_regexps = [ preprocess_regexps = [
(re.compile(i[0], re.IGNORECASE | re.DOTALL), i[1]) for i in (re.compile(i[0], re.IGNORECASE | re.DOTALL), i[1]) for i in
[ [
(r'\s*</', lambda match: '</'), (r'\s*</', lambda match: '</'),
] ]
] ]

View File

@ -11,7 +11,7 @@ from calibre.web.feeds.news import BasicNewsRecipe
class eksiazki(BasicNewsRecipe): class eksiazki(BasicNewsRecipe):
title = u'eKsiazki.org' title = u'eKsiazki.org'
desciption = u'Twoje centrum wiedzy o ePapierze i eBookach' description = u'Twoje centrum wiedzy o ePapierze i eBookach'
language = 'pl' language = 'pl'
__author__ = u'Tomasz D\u0142ugosz' __author__ = u'Tomasz D\u0142ugosz'
no_stylesheets = True no_stylesheets = True

View File

@ -0,0 +1,34 @@
#!/usr/bin/env python
__license__ = 'GPL v3'
__copyright__ = u'2010, Tomasz Dlugosz <tomek3d@gmail.com>'
'''
fronda.pl
'''
from calibre.web.feeds.news import BasicNewsRecipe
import re
class Fronda(BasicNewsRecipe):
title = u'Fronda.pl'
publisher = u'Fronda.pl'
description = u'Portal po\u015bwi\u0119cony - Infformacje'
language = 'pl'
__author__ = u'Tomasz D\u0142ugosz'
oldest_article = 7
max_articles_per_feed = 100
use_embedded_content = False
feeds = [(u'Infformacje', u'http://fronda.pl/news/feed')]
keep_only_tags = [dict(name='h1', attrs={'class':'big'}),
dict(name='ul', attrs={'class':'about clear'}),
dict(name='div', attrs={'class':'content'})]
preprocess_regexps = [
(re.compile(i[0], re.IGNORECASE | re.DOTALL), i[1]) for i in
[ (r'<a href="#" class="print">Drukuj</a>', lambda match: ''),
(r'<p><a href="http://fronda.pl/sklepy">.*</a></p>', lambda match: ''),
(r'<p><a href="http://fronda.pl/pasaz">.*</a></p>', lambda match: ''),
(r'<h3><strong>W.* lektury.*</a></p></div>', lambda match: '</div>'),
(r'<h3>Zobacz t.*?</div>', lambda match: '</div>') ]
]

View File

@ -10,6 +10,7 @@ from calibre.web.feeds.news import BasicNewsRecipe
class InteriaFakty(BasicNewsRecipe): class InteriaFakty(BasicNewsRecipe):
title = u'Interia.pl - Fakty' title = u'Interia.pl - Fakty'
description = u'Fakty ze strony interia.pl'
language = 'pl' language = 'pl'
oldest_article = 7 oldest_article = 7
__author__ = u'Tomasz D\u0142ugosz' __author__ = u'Tomasz D\u0142ugosz'

View File

@ -11,6 +11,7 @@ from calibre.web.feeds.news import BasicNewsRecipe
class InteriaSport(BasicNewsRecipe): class InteriaSport(BasicNewsRecipe):
title = u'Interia.pl - Sport' title = u'Interia.pl - Sport'
description = u'Sport ze strony interia.pl'
language = 'pl' language = 'pl'
oldest_article = 7 oldest_article = 7
__author__ = u'Tomasz D\u0142ugosz' __author__ = u'Tomasz D\u0142ugosz'
@ -30,7 +31,8 @@ class InteriaSport(BasicNewsRecipe):
keep_only_tags = [dict(name='div', attrs={'id':'article'})] keep_only_tags = [dict(name='div', attrs={'id':'article'})]
remove_tags = [dict(name='div', attrs={'class':'object gallery'})] remove_tags = [dict(name='div', attrs={'class':'object gallery'}),
dict(name='div', attrs={'class':'box fontSizeSwitch'})]
extra_css = ''' extra_css = '''
.articleDate { .articleDate {

View File

@ -1,6 +1,6 @@
#!/usr/bin/env python #!/usr/bin/env python
__license__ = 'GPL v3' __license__ = 'GPL v3'
__author__ = 'Mori' __author__ = 'Mori'
__version__ = 'v. 0.1' __version__ = 'v. 0.1'
''' '''
@ -10,34 +10,34 @@ olgierd.bblog.pl
from calibre.web.feeds.news import BasicNewsRecipe from calibre.web.feeds.news import BasicNewsRecipe
class LegeArtisRecipe(BasicNewsRecipe): class LegeArtisRecipe(BasicNewsRecipe):
__author__ = 'Mori' __author__ = 'Mori'
language = 'pl' language = 'pl'
title = u'Lege Artis' title = u'Lege Artis'
publisher = u'Olgierd Rudak' publisher = u'Olgierd Rudak'
description = u'Wszystko, co chcieliby\xc5\x9bcie wiedzie\xc4\x87 o prawie, ale wstydzicie si\xc4\x99 zapyta\xc4\x87' description = u'Wszystko, co chcieliby\u015bcie wiedzie\u0107 o prawie, ale wstydzicie si\u0119 zapyta\u0107'
max_articles_per_feed = 100 max_articles_per_feed = 100
no_stylesheets = True no_stylesheets = True
remove_javascript = True remove_javascript = True
extra_css = ''' extra_css = '''
img{clear: both;} img{clear: both;}
''' '''
feeds = [ feeds = [
(u'Lege Artis', u'http://olgierd.bblog.pl/rss/rss20.xml') (u'Lege Artis', u'http://olgierd.bblog.pl/rss/rss20.xml')
] ]
keep_only_tags = [ keep_only_tags = [
dict(name = 'div', attrs = {'class' : 'post_title'}), dict(name = 'div', attrs = {'class' : 'post_title'}),
dict(name = 'div', attrs = {'class' : 'post_date'}), dict(name = 'div', attrs = {'class' : 'post_date'}),
dict(name = 'div', attrs = {'class' : 'post_content'}) dict(name = 'div', attrs = {'class' : 'post_content'})
] ]
remove_tags = [ remove_tags = [
dict(name = 'div', attrs = {'id' : 'bb_tools'}), dict(name = 'div', attrs = {'id' : 'bb_tools'}),
dict(name = 'div', attrs = {'class' : 'post_comments'}), dict(name = 'div', attrs = {'class' : 'post_comments'}),
dict(name = 'object', attrs = {}) dict(name = 'object', attrs = {})
] ]

View File

@ -10,6 +10,7 @@ from calibre.web.feeds.news import BasicNewsRecipe
class Legitymizm(BasicNewsRecipe): class Legitymizm(BasicNewsRecipe):
title = u'Organizacja Monarchist\xf3w Polskich' title = u'Organizacja Monarchist\xf3w Polskich'
description = u'Portal legitymistyczny'
language = 'pl' language = 'pl'
oldest_article = 7 oldest_article = 7
__author__ = u'Tomasz D\u0142ugosz' __author__ = u'Tomasz D\u0142ugosz'

View File

@ -12,7 +12,7 @@ from calibre.web.feeds.news import BasicNewsRecipe
class michalkiewicz(BasicNewsRecipe): class michalkiewicz(BasicNewsRecipe):
title = u'Stanis\u0142aw Michalkiewicz' title = u'Stanis\u0142aw Michalkiewicz'
desciption = u'Strona autorska * felietony * artyku\u0142y * komentarze' description = u'Strona autorska * felietony * artyku\u0142y * komentarze'
__author__ = u'Tomasz D\u0142ugosz' __author__ = u'Tomasz D\u0142ugosz'
language = 'pl' language = 'pl'
oldest_article = 7 oldest_article = 7

View File

@ -12,7 +12,7 @@ from calibre.web.feeds.news import BasicNewsRecipe
class NCzas(BasicNewsRecipe): class NCzas(BasicNewsRecipe):
title = u'Najwy\u017cszy Czas!' title = u'Najwy\u017cszy Czas!'
desciption = u'Najwy\u017cszy Czas!\nwydanie internetowe' description = u'Najwy\u017cszy Czas!\nwydanie internetowe'
__author__ = u'Tomasz D\u0142ugosz' __author__ = u'Tomasz D\u0142ugosz'
language = 'pl' language = 'pl'
oldest_article = 7 oldest_article = 7

View File

@ -0,0 +1,52 @@
#!/usr/bin/env python
__license__ = 'GPL v3'
__author__ = 'Mori'
__version__ = 'v. 0.1'
'''
www.runa.pl/blog
'''
from calibre.web.feeds.news import BasicNewsRecipe
import re
class FantazmatyRecipe(BasicNewsRecipe):
__author__ = 'Mori'
language = 'pl'
title = u'Fantazmaty'
publisher = u'Agencja Wydawnicza Runa'
description = u'Blog Agencji Wydawniczej Runa'
no_stylesheets = True
remove_javascript = True
encoding = 'utf-8'
oldest_article = 100
max_articles_per_feed = 100
extra_css = '''
img{float: left; padding-right: 10px; padding-bottom: 5px;}
'''
feeds = [
(u'Fantazmaty', u'http://www.runa.pl/blog/rss.xml')
]
remove_tags = [
dict(name = 'div', attrs = {'class' : 'path'}),
dict(name = 'div', attrs = {'class' : 'drdot'}),
dict(name = 'div', attrs = {'class' : 'picture'})
]
remove_tags_after = [
dict(name = 'div', attrs = {'class' : 'content'})
]
preprocess_regexps = [
(re.compile(i[0], re.IGNORECASE | re.DOTALL), i[1]) for i in
[
(r'<body>.*?<div id="primary"', lambda match: '<body><div id="primary"'),
(r'<!--.*?-->', lambda match: '')
]
]