This commit is contained in:
Kovid Goyal 2015-10-16 20:52:06 +05:30
commit da8e5bffa8
6 changed files with 8 additions and 80 deletions

Binary file not shown.

Before

Width:  |  Height:  |  Size: 500 B

Binary file not shown.

Before

Width:  |  Height:  |  Size: 502 B

View File

@ -1,21 +1,19 @@
#!/usr/bin/env python2
__license__ = 'GPL v3'
__author__ = 'Mori'
__version__ = 'v. 0.1'
'''
olgierd.bblog.pl
czasopismo.legeartis.org
'''
from calibre.web.feeds.news import BasicNewsRecipe
class LegeArtisRecipe(BasicNewsRecipe):
__author__ = 'Mori'
__author__ = 'Mori, Tomasz D\u0142ugosz'
language = 'pl'
title = u'Lege Artis'
publisher = u'Olgierd Rudak'
description = u'Wszystko, co chcieliby\u015bcie wiedzie\u0107 o prawie, ale wstydzicie si\u0119 zapyta\u0107'
description = u'Prawie wszystko, co chcieliby\u015bcie wiedzie\u0107 o prawie, ale wstydzili\u015bcie si\u0119 zapyta\u0107'
max_articles_per_feed = 100
@ -27,17 +25,6 @@ class LegeArtisRecipe(BasicNewsRecipe):
'''
feeds = [
(u'Lege Artis', u'http://olgierd.bblog.pl/rss/rss20.xml')
(u'Lege Artis', u'http://czasopismo.legeartis.org/feed')
]
keep_only_tags = [
dict(name = 'div', attrs = {'class' : 'post_title'}),
dict(name = 'div', attrs = {'class' : 'post_date'}),
dict(name = 'div', attrs = {'class' : 'post_content'})
]
remove_tags = [
dict(name = 'div', attrs = {'id' : 'bb_tools'}),
dict(name = 'div', attrs = {'class' : 'post_comments'}),
dict(name = 'object', attrs = {})
]

View File

@ -22,3 +22,7 @@ class lifehacking(BasicNewsRecipe):
no_stylesheets=True
feeds = [(u'Lifehacker polska', u'http://feeds.feedburner.com/pl_lh')]
keep_only_tags = [
dict(name='header', attrs={'class': 'td-post-title'}),
dict(name='div', attrs={'class': 'td-post-content'})]

View File

@ -1,26 +0,0 @@
#!/usr/bin/env python2
__license__ = 'GPL v3'
__copyright__ = 'MrStefan'
'''
www.mobilna.pl
'''
from calibre.web.feeds.news import BasicNewsRecipe
class mobilna(BasicNewsRecipe):
title = u'Mobilna.pl'
__author__ = 'MrStefan <mrstefaan@gmail.com>'
language = 'pl'
description =u'twoja mobilna strona'
#masthead_url=''
remove_empty_feeds= True
oldest_article = 7
max_articles_per_feed = 100
remove_javascript=True
no_stylesheets=True
use_embedded_content = True
#keep_only_tags =[dict(attrs={'class':'Post'})]
feeds = [(u'Artykuły', u'http://mobilna.pl/feed/')]

View File

@ -1,37 +0,0 @@
from calibre.web.feeds.news import BasicNewsRecipe
from calibre.ebooks.BeautifulSoup import Comment
class OCLab(BasicNewsRecipe):
title = u'OCLab.pl'
oldest_article = 7
max_articles_per_feed = 100
__author__ = 'fenuks'
description = u'Portal OCLab.pl jest miejscem przyjaznym pasjonatom sprzętu komputerowego, w szczególności overclockerom, które będzie służyć im za aktualną bazę wiedzy o podkręcaniu komputera, źródło aktualnych informacji z rynku oraz opinii na temat sprzętu komputerowego.'
category = 'IT'
language = 'pl'
cover_url= 'http://www.idealforum.ru/attachment.php?attachmentid=7963&d=1316008118'
no_stylesheets = True
keep_only_tags=[dict(id='main')]
remove_tags_after= dict(attrs={'class':'single-postmetadata'})
remove_tags=[dict(attrs={'class':['single-postmetadata', 'pagebar', 'shr-bookmarks shr-bookmarks-expand shr-bookmarks-center shr-bookmarks-bg-enjoy']})]
feeds = [(u'Wpisy', u'http://oclab.pl/feed/')]
def append_page(self, soup, appendtag):
tag=soup.find(attrs={'class':'contentjumpddl'})
if tag:
nexturl=tag.findAll('option')
for nextpage in nexturl[1:-1]:
soup2 = self.index_to_soup(nextpage['value'])
pagetext = soup2.find(attrs={'class':'single-entry'})
pos = len(appendtag.contents)
appendtag.insert(pos, pagetext)
for r in appendtag.findAll(attrs={'class':'post-nav-bottom-list'}):
r.extract()
comments = appendtag.findAll(text=lambda text:isinstance(text, Comment))
for comment in comments:
comment.extract()
def preprocess_html(self, soup):
self.append_page(soup, soup.body)
return soup