new recipes from kalibrator project
29
recipes/dzial_zagraniczny.recipe
Normal file
@ -0,0 +1,29 @@
|
|||||||
|
#!/usr/bin/env python
|
||||||
|
|
||||||
|
__license__ = 'GPL v3'
|
||||||
|
__author__ = 'teepel <teepel44@gmail.com>'
|
||||||
|
|
||||||
|
'''
|
||||||
|
dzialzagraniczny.pl
|
||||||
|
'''
|
||||||
|
|
||||||
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
import re
|
||||||
|
|
||||||
|
class dzial_zagraniczny(BasicNewsRecipe):
|
||||||
|
title = u'Dział Zagraniczny'
|
||||||
|
__author__ = 'teepel <teepel44@gmail.com>'
|
||||||
|
language = 'pl'
|
||||||
|
description = u'Polskiego czytelnika to nie interesuje'
|
||||||
|
INDEX = 'http://dzialzagraniczny.pl'
|
||||||
|
extra_css = 'img {display: block;}'
|
||||||
|
oldest_article = 7
|
||||||
|
cover_url = 'https://fbcdn-profile-a.akamaihd.net/hprofile-ak-prn1/c145.5.160.160/559442_415653975115959_2126205128_n.jpg'
|
||||||
|
max_articles_per_feed = 100
|
||||||
|
remove_empty_feeds = True
|
||||||
|
simultaneous_downloads = 5
|
||||||
|
remove_javascript = True
|
||||||
|
no_stylesheets = True
|
||||||
|
use_embedded_content = True
|
||||||
|
|
||||||
|
feeds = [(u'Dział zagraniczny', u'http://feeds.feedburner.com/dyndns/UOfz')]
|
28
recipes/equipped.recipe
Normal file
@ -0,0 +1,28 @@
|
|||||||
|
#!/usr/bin/env python
|
||||||
|
|
||||||
|
__license__ = 'GPL v3'
|
||||||
|
__author__ = 'teepel <teepel44@gmail.com>, Artur Stachecki <artur.stachecki@gmail.com>'
|
||||||
|
|
||||||
|
'''
|
||||||
|
equipped.pl
|
||||||
|
'''
|
||||||
|
|
||||||
|
class equipped(AutomaticNewsRecipe):
|
||||||
|
title = u'Equipped'
|
||||||
|
__author__ = 'teepel <teepel44@gmail.com>'
|
||||||
|
language = 'pl'
|
||||||
|
description = u'Wiadomości z equipped.pl'
|
||||||
|
INDEX = 'http://equipped.pl'
|
||||||
|
extra_css = '.alignleft {float:left; margin-right:5px;}'
|
||||||
|
oldest_article = 7
|
||||||
|
max_articles_per_feed = 100
|
||||||
|
remove_empty_feeds = True
|
||||||
|
simultaneous_downloads = 5
|
||||||
|
remove_javascript = True
|
||||||
|
no_stylesheets = True
|
||||||
|
use_embedded_content = False
|
||||||
|
#keep_only_tags = [dict(name='article')]
|
||||||
|
#remove_tags = [dict(id='disqus_thread')]
|
||||||
|
#remove_tags_after = [dict(id='disqus_thread')]
|
||||||
|
|
||||||
|
feeds = [(u'Equipped', u'http://feeds.feedburner.com/Equippedpl?format=xml')]
|
BIN
recipes/icons/dzial_zagraniczny.png
Normal file
After Width: | Height: | Size: 491 B |
BIN
recipes/icons/equipped.png
Normal file
After Width: | Height: | Size: 929 B |
BIN
recipes/icons/ittechblog.png
Normal file
After Width: | Height: | Size: 731 B |
BIN
recipes/icons/magazyn_consido.png
Normal file
After Width: | Height: | Size: 982 B |
BIN
recipes/icons/media2.png
Normal file
After Width: | Height: | Size: 660 B |
BIN
recipes/icons/mobilna.png
Normal file
After Width: | Height: | Size: 885 B |
BIN
recipes/icons/mojegotowanie.png
Normal file
After Width: | Height: | Size: 307 B |
BIN
recipes/icons/najwyzszy_czas.png
Normal file
After Width: | Height: | Size: 616 B |
BIN
recipes/icons/nowiny_rybnik.png
Normal file
After Width: | Height: | Size: 1.2 KiB |
BIN
recipes/icons/osw.png
Normal file
After Width: | Height: | Size: 489 B |
BIN
recipes/icons/ppe_pl.png
Normal file
After Width: | Height: | Size: 3.1 KiB |
BIN
recipes/icons/presseurop.png
Normal file
After Width: | Height: | Size: 207 B |
BIN
recipes/icons/res_publica.png
Normal file
After Width: | Height: | Size: 733 B |
BIN
recipes/icons/wolne_media.png
Normal file
After Width: | Height: | Size: 497 B |
27
recipes/ittechblog.recipe
Normal file
@ -0,0 +1,27 @@
|
|||||||
|
__license__ = 'GPL v3'
|
||||||
|
__copyright__ = 'MrStefan'
|
||||||
|
|
||||||
|
'''
|
||||||
|
www.ittechblog.pl
|
||||||
|
'''
|
||||||
|
|
||||||
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
import re
|
||||||
|
|
||||||
|
class ittechblog(BasicNewsRecipe):
|
||||||
|
title = u'IT techblog'
|
||||||
|
__author__ = 'MrStefan <mrstefaan@gmail.com>'
|
||||||
|
language = 'pl'
|
||||||
|
description =u'Na naszym blogu technologicznym znajdziesz między innymi: testy sprzętu, najnowsze startupy, technologiczne nowinki, felietony tematyczne.'
|
||||||
|
extra_css = '.cover > img {display:block;}'
|
||||||
|
remove_empty_feeds = True
|
||||||
|
oldest_article = 7
|
||||||
|
max_articles_per_feed = 100
|
||||||
|
remove_javascript = True
|
||||||
|
no_stylesheets = True
|
||||||
|
use_embedded_content = False
|
||||||
|
|
||||||
|
keep_only_tags =[dict(attrs={'class':'box'})]
|
||||||
|
remove_tags =[dict(name='aside'), dict(attrs={'class':['tags', 'counter', 'twitter-share-button']})]
|
||||||
|
|
||||||
|
feeds = [(u'Artykuły', u'http://feeds.feedburner.com/ITTechBlog?format=xml')]
|
88
recipes/magazyn_consido.recipe
Normal file
@ -0,0 +1,88 @@
|
|||||||
|
#!/usr/bin/env python
|
||||||
|
|
||||||
|
__license__ = 'GPL v3'
|
||||||
|
|
||||||
|
'''
|
||||||
|
magazynconsido.pl/
|
||||||
|
'''
|
||||||
|
|
||||||
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
from calibre.utils.magick import Image
|
||||||
|
|
||||||
|
class magazynconsido(BasicNewsRecipe):
|
||||||
|
title = u'Magazyn Consido'
|
||||||
|
__author__ = 'Artur Stachecki <artur.stachecki@gmail.com> ,teepel <teepel44@gmail.com>'
|
||||||
|
language = 'pl'
|
||||||
|
description =u'Portal dla architektów i projektantów'
|
||||||
|
masthead_url='http://qualitypixels.pl/wp-content/themes/airlock/advance/inc/timthumb.php?src=http://qualitypixels.pl/wp-content/uploads/2012/01/logotyp-magazynconsido-11.png&w=455&zc=1'
|
||||||
|
oldest_article = 7
|
||||||
|
max_articles_per_feed = 100
|
||||||
|
remove_javascript=True
|
||||||
|
no_stylesheets = True
|
||||||
|
use_embedded_content = False
|
||||||
|
|
||||||
|
keep_only_tags =[]
|
||||||
|
keep_only_tags.append(dict(name = 'h1'))
|
||||||
|
keep_only_tags.append(dict(name = 'p'))
|
||||||
|
keep_only_tags.append(dict(attrs = {'class' : 'navigation'}))
|
||||||
|
remove_tags =[dict(attrs = {'style' : 'font-size: x-small;' })]
|
||||||
|
|
||||||
|
remove_tags_after =[dict(attrs = {'class' : 'navigation' })]
|
||||||
|
|
||||||
|
extra_css=''' img {max-width:30%; max-height:30%; display: block; margin-left: auto; margin-right: auto;}
|
||||||
|
h1 {text-align: center;}'''
|
||||||
|
|
||||||
|
def parse_index(self): #(kk)
|
||||||
|
soup = self.index_to_soup('http://feeds.feedburner.com/magazynconsido?format=xml')
|
||||||
|
feeds = []
|
||||||
|
articles = {}
|
||||||
|
sections = []
|
||||||
|
section = ''
|
||||||
|
|
||||||
|
for item in soup.findAll('item') :
|
||||||
|
section = self.tag_to_string(item.category)
|
||||||
|
if not articles.has_key(section) :
|
||||||
|
sections.append(section)
|
||||||
|
articles[section] = []
|
||||||
|
article_url = self.tag_to_string(item.guid)
|
||||||
|
article_title = self.tag_to_string(item.title)
|
||||||
|
article_date = self.tag_to_string(item.pubDate)
|
||||||
|
article_description = self.tag_to_string(item.description)
|
||||||
|
articles[section].append( { 'title' : article_title, 'url' : article_url, 'date' : article_date, 'description' : article_description })
|
||||||
|
|
||||||
|
for section in sections :
|
||||||
|
if section == 'Video':
|
||||||
|
feeds.append((section, articles[section]))
|
||||||
|
feeds.pop()
|
||||||
|
else:
|
||||||
|
feeds.append((section, articles[section]))
|
||||||
|
return feeds
|
||||||
|
|
||||||
|
def append_page(self, soup, appendtag):
|
||||||
|
apage = soup.find('div', attrs={'class':'wp-pagenavi'})
|
||||||
|
if apage is not None:
|
||||||
|
nexturl = soup.find('a', attrs={'class':'nextpostslink'})
|
||||||
|
soup2 = self.index_to_soup(nexturl['href'])
|
||||||
|
pagetext = soup2.findAll('p')
|
||||||
|
for tag in pagetext:
|
||||||
|
pos = len(appendtag.contents)
|
||||||
|
appendtag.insert(pos, tag)
|
||||||
|
|
||||||
|
while appendtag.find('div', attrs={'class': ['height: 35px;', 'post-meta', 'addthis_toolbox addthis_default_style addthis_', 'post-meta-bottom', 'block_recently_post', 'fbcomments', 'pin-it-button', 'pages', 'navigation']}) is not None:
|
||||||
|
appendtag.find('div', attrs={'class': ['height: 35px;', 'post-meta', 'addthis_toolbox addthis_default_style addthis_', 'post-meta-bottom', 'block_recently_post', 'fbcomments', 'pin-it-button', 'pages', 'navigation']}).replaceWith('')
|
||||||
|
|
||||||
|
def preprocess_html(self, soup): #(kk)
|
||||||
|
self.append_page(soup, soup.body)
|
||||||
|
return self.adeify_images(soup)
|
||||||
|
|
||||||
|
def postprocess_html(self, soup, first):
|
||||||
|
#process all the images
|
||||||
|
for tag in soup.findAll(lambda tag: tag.name.lower()=='img' and tag.has_key('src')):
|
||||||
|
iurl = tag['src']
|
||||||
|
img = Image()
|
||||||
|
img.open(iurl)
|
||||||
|
if img < 0:
|
||||||
|
raise RuntimeError('Out of memory')
|
||||||
|
img.type = "GrayscaleType"
|
||||||
|
img.save(iurl)
|
||||||
|
return soup
|
37
recipes/media2.recipe
Normal file
@ -0,0 +1,37 @@
|
|||||||
|
#!/usr/bin/env python
|
||||||
|
|
||||||
|
__license__ = 'GPL v3'
|
||||||
|
__copyright__ = 'teepel'
|
||||||
|
|
||||||
|
'''
|
||||||
|
media2.pl
|
||||||
|
'''
|
||||||
|
|
||||||
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
import re
|
||||||
|
|
||||||
|
class media2_pl(BasicNewsRecipe):
|
||||||
|
title = u'Media2'
|
||||||
|
__author__ = 'teepel <teepel44@gmail.com>'
|
||||||
|
language = 'pl'
|
||||||
|
description =u'Media2.pl to jeden z najczęściej odwiedzanych serwisów dla profesjonalistów z branży medialnej, telekomunikacyjnej, public relations oraz nowych technologii.'
|
||||||
|
masthead_url='http://media2.pl/res/logo/www.png'
|
||||||
|
remove_empty_feeds= True
|
||||||
|
oldest_article = 1
|
||||||
|
max_articles_per_feed = 100
|
||||||
|
remove_javascript=True
|
||||||
|
no_stylesheets=True
|
||||||
|
simultaneous_downloads = 5
|
||||||
|
|
||||||
|
extra_css = '''.news-lead{font-weight: bold; }'''
|
||||||
|
|
||||||
|
keep_only_tags =[]
|
||||||
|
keep_only_tags.append(dict(name = 'div', attrs = {'class' : 'news-item tpl-big'}))
|
||||||
|
|
||||||
|
remove_tags =[]
|
||||||
|
remove_tags.append(dict(name = 'span', attrs = {'class' : 'news-comments'}))
|
||||||
|
remove_tags.append(dict(name = 'div', attrs = {'class' : 'item-sidebar'}))
|
||||||
|
remove_tags.append(dict(name = 'div', attrs = {'class' : 'news-tags'}))
|
||||||
|
|
||||||
|
|
||||||
|
feeds = [(u'Media2', u'http://feeds.feedburner.com/media2')]
|
27
recipes/mobilna.recipe
Normal file
@ -0,0 +1,27 @@
|
|||||||
|
#!/usr/bin/env python
|
||||||
|
|
||||||
|
__license__ = 'GPL v3'
|
||||||
|
__copyright__ = 'MrStefan'
|
||||||
|
|
||||||
|
'''
|
||||||
|
www.mobilna.pl
|
||||||
|
'''
|
||||||
|
|
||||||
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
import re
|
||||||
|
|
||||||
|
class mobilna(BasicNewsRecipe):
|
||||||
|
title = u'Mobilna.pl'
|
||||||
|
__author__ = 'MrStefan <mrstefaan@gmail.com>'
|
||||||
|
language = 'pl'
|
||||||
|
description =u'twoja mobilna strona'
|
||||||
|
#masthead_url=''
|
||||||
|
remove_empty_feeds= True
|
||||||
|
oldest_article = 7
|
||||||
|
max_articles_per_feed = 100
|
||||||
|
remove_javascript=True
|
||||||
|
no_stylesheets=True
|
||||||
|
use_embedded_content = True
|
||||||
|
#keep_only_tags =[dict(attrs={'class':'Post'})]
|
||||||
|
|
||||||
|
feeds = [(u'Artykuły', u'http://mobilna.pl/feed/')]
|
51
recipes/mojegotowanie.recipe
Normal file
@ -0,0 +1,51 @@
|
|||||||
|
#!usr/bin/env python
|
||||||
|
|
||||||
|
__license__ = 'GPL v3'
|
||||||
|
__copyright__ = 'MrStefan, teepel'
|
||||||
|
|
||||||
|
'''
|
||||||
|
www.mojegotowanie.pl
|
||||||
|
'''
|
||||||
|
|
||||||
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
import re
|
||||||
|
|
||||||
|
class mojegotowanie(BasicNewsRecipe):
|
||||||
|
title = u'Moje Gotowanie'
|
||||||
|
__author__ = 'MrStefan <mrstefaan@gmail.com>, teepel <teepel44@gmail.com>'
|
||||||
|
language = 'pl'
|
||||||
|
description =u'Gotowanie to Twoja pasja? Uwielbiasz sałatki? Lubisz grillować? Przepisy kulinarne doskonałe na wszystkie okazje znajdziesz na www.mojegotowanie.pl.'
|
||||||
|
masthead_url='http://www.mojegotowanie.pl/extension/selfstart/design/self/images/top_c2.gif'
|
||||||
|
cover_url = 'http://www.mojegotowanie.pl/extension/selfstart/design/self/images/mgpl/mojegotowanie.gif'
|
||||||
|
remove_empty_feeds= True
|
||||||
|
oldest_article = 7
|
||||||
|
max_articles_per_feed = 100
|
||||||
|
remove_javascript=True
|
||||||
|
no_stylesheets=True
|
||||||
|
|
||||||
|
keep_only_tags =[]
|
||||||
|
keep_only_tags.append(dict(name = 'div', attrs = {'class' : 'content'}))
|
||||||
|
|
||||||
|
feeds = [(u'Artykuły', u'http://mojegotowanie.pl/rss/feed/artykuly'),
|
||||||
|
(u'Przepisy', u'http://mojegotowanie.pl/rss/feed/przepisy')]
|
||||||
|
|
||||||
|
def parse_feeds(self):
|
||||||
|
feeds = BasicNewsRecipe.parse_feeds(self)
|
||||||
|
for feed in feeds:
|
||||||
|
for article in feed.articles[:]:
|
||||||
|
if 'film' in article.title:
|
||||||
|
feed.articles.remove(article)
|
||||||
|
return feeds
|
||||||
|
|
||||||
|
def get_article_url(self, article):
|
||||||
|
link = article.get('link')
|
||||||
|
if 'Clayout0Cset0Cprint0' in link:
|
||||||
|
return link
|
||||||
|
|
||||||
|
def print_version(self, url):
|
||||||
|
segment = url.split('/')
|
||||||
|
URLPart = segment[-2]
|
||||||
|
URLPart = URLPart.replace('0L0Smojegotowanie0Bpl0Clayout0Cset0Cprint0C', '/')
|
||||||
|
URLPart = URLPart.replace('0I', '_')
|
||||||
|
URLPart = URLPart.replace('0C', '/')
|
||||||
|
return 'http://www.mojegotowanie.pl/layout/set/print' + URLPart
|
28
recipes/najwyzszy_czas.recipe
Normal file
@ -0,0 +1,28 @@
|
|||||||
|
#!/usr/bin/env python
|
||||||
|
|
||||||
|
__license__ = 'GPL v3'
|
||||||
|
__author__ = 'teepel <teepel44@gmail.com>'
|
||||||
|
|
||||||
|
'''
|
||||||
|
nczas.com
|
||||||
|
'''
|
||||||
|
|
||||||
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
import re
|
||||||
|
|
||||||
|
class nczas(BasicNewsRecipe):
|
||||||
|
title = u'Najwy\u017cszy Czas'
|
||||||
|
__author__ = 'teepel <teepel44@gmail.com>'
|
||||||
|
language = 'pl'
|
||||||
|
description ='Wiadomości z nczas.com'
|
||||||
|
INDEX='http://nczas.com'
|
||||||
|
oldest_article = 7
|
||||||
|
max_articles_per_feed = 100
|
||||||
|
use_embedded_content = True
|
||||||
|
remove_empty_feeds= True
|
||||||
|
simultaneous_downloads = 5
|
||||||
|
remove_javascript=True
|
||||||
|
remove_attributes = ['style']
|
||||||
|
no_stylesheets=True
|
||||||
|
|
||||||
|
feeds = [(u'Najwyższy Czas', u'http://nczas.com/feed/')]
|
33
recipes/nowiny_rybnik.recipe
Normal file
@ -0,0 +1,33 @@
|
|||||||
|
#!/usr/bin/env python
|
||||||
|
|
||||||
|
__license__ = 'GPL v3'
|
||||||
|
|
||||||
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
import re
|
||||||
|
|
||||||
|
|
||||||
|
class NowinyRybnik(BasicNewsRecipe):
|
||||||
|
title = u'Nowiny - Rybnik'
|
||||||
|
__author__ = 'Artur Stachecki <artur.stachecki@gmail.com>'
|
||||||
|
language = 'pl'
|
||||||
|
description = u'Tygodnik Regionalny NOWINY. Ogłoszenia drobne, wiadomości i wydarzenia z regionu Rybnika i okolic'
|
||||||
|
oldest_article = 7
|
||||||
|
masthead_url = 'http://www.nowiny.rybnik.pl/logo/logo.jpg'
|
||||||
|
max_articles_per_feed = 100
|
||||||
|
simultaneous_downloads = 5
|
||||||
|
remove_javascript = True
|
||||||
|
no_stylesheets = True
|
||||||
|
|
||||||
|
keep_only_tags = [(dict(name='div', attrs={'id': 'drukuj'}))]
|
||||||
|
|
||||||
|
remove_tags = []
|
||||||
|
remove_tags.append(dict(name='div', attrs={'id': 'footer'}))
|
||||||
|
|
||||||
|
feeds = [(u'Wszystkie artykuły', u'http://www.nowiny.rybnik.pl/rss,artykuly,dzial,0,miasto,0,ile,25.xml')]
|
||||||
|
|
||||||
|
def preprocess_html(self, soup):
|
||||||
|
for alink in soup.findAll('a'):
|
||||||
|
if alink.string is not None:
|
||||||
|
tstr = alink.string
|
||||||
|
alink.replaceWith(tstr)
|
||||||
|
return soup
|
42
recipes/osw.recipe
Normal file
@ -0,0 +1,42 @@
|
|||||||
|
#!/usr/bin/env python
|
||||||
|
|
||||||
|
__license__ = 'GPL v3'
|
||||||
|
__author__ = 'teepel <teepel44@gmail.com>'
|
||||||
|
|
||||||
|
'''
|
||||||
|
http://www.osw.waw.pl - Osrodek studiow wschodnich
|
||||||
|
'''
|
||||||
|
|
||||||
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
import re
|
||||||
|
|
||||||
|
class OSW_Recipe(BasicNewsRecipe):
|
||||||
|
|
||||||
|
language = 'pl'
|
||||||
|
title = u'Ośrodek Studiów Wschodnich'
|
||||||
|
__author__ = 'teepel <teepel44@gmail.com>'
|
||||||
|
INDEX='http://www.osw.waw.pl'
|
||||||
|
description = u'Ośrodek Studiów Wschodnich im. Marka Karpia. Centre for Eastern Studies.'
|
||||||
|
category = u'News'
|
||||||
|
oldest_article = 7
|
||||||
|
max_articles_per_feed = 100
|
||||||
|
cover_url=''
|
||||||
|
remove_empty_feeds= True
|
||||||
|
no_stylesheets=True
|
||||||
|
remove_javascript = True
|
||||||
|
simultaneous_downloads = 5
|
||||||
|
|
||||||
|
keep_only_tags =[]
|
||||||
|
#this line should show title of the article, but it doesnt work
|
||||||
|
keep_only_tags.append(dict(name = 'h1', attrs = {'class' : 'print-title'}))
|
||||||
|
keep_only_tags.append(dict(name = 'div', attrs = {'class' : 'print-submitted'}))
|
||||||
|
keep_only_tags.append(dict(name = 'div', attrs = {'class' : 'print-content'}))
|
||||||
|
|
||||||
|
remove_tags =[]
|
||||||
|
remove_tags.append(dict(name = 'table', attrs = {'id' : 'attachments'}))
|
||||||
|
remove_tags.append(dict(name = 'div', attrs = {'class' : 'print-submitted'}))
|
||||||
|
|
||||||
|
feeds = [(u'OSW', u'http://www.osw.waw.pl/pl/rss.xml')]
|
||||||
|
|
||||||
|
def print_version(self, url):
|
||||||
|
return url.replace('http://www.osw.waw.pl/pl/', 'http://www.osw.waw.pl/pl/print/')
|
41
recipes/ppe_pl.recipe
Normal file
@ -0,0 +1,41 @@
|
|||||||
|
import re
|
||||||
|
|
||||||
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
|
||||||
|
class ppeRecipe(BasicNewsRecipe):
|
||||||
|
__license__ = 'GPL v3'
|
||||||
|
__author__ = u'Artur Stachecki <artur.stachecki@gmail.com>'
|
||||||
|
language = 'pl'
|
||||||
|
version = 1
|
||||||
|
|
||||||
|
title = u'ppe.pl'
|
||||||
|
category = u'News'
|
||||||
|
description = u'Portal o konsolach i grach wideo.'
|
||||||
|
cover_url=''
|
||||||
|
remove_empty_feeds= True
|
||||||
|
no_stylesheets=True
|
||||||
|
oldest_article = 1
|
||||||
|
max_articles_per_feed = 100000
|
||||||
|
recursions = 0
|
||||||
|
no_stylesheets = True
|
||||||
|
remove_javascript = True
|
||||||
|
simultaneous_downloads = 2
|
||||||
|
|
||||||
|
keep_only_tags =[]
|
||||||
|
keep_only_tags.append(dict(name = 'div', attrs = {'class' : 'news-heading'}))
|
||||||
|
keep_only_tags.append(dict(name = 'div', attrs = {'class' : 'tresc-poziom'}))
|
||||||
|
|
||||||
|
remove_tags =[]
|
||||||
|
remove_tags.append(dict(name = 'div', attrs = {'class' : 'bateria1'}))
|
||||||
|
remove_tags.append(dict(name = 'div', attrs = {'class' : 'bateria2'}))
|
||||||
|
remove_tags.append(dict(name = 'div', attrs = {'class' : 'bateria3'}))
|
||||||
|
remove_tags.append(dict(name = 'div', attrs = {'class' : 'news-photo'}))
|
||||||
|
remove_tags.append(dict(name = 'div', attrs = {'class' : 'fbl'}))
|
||||||
|
remove_tags.append(dict(name = 'div', attrs = {'class' : 'info'}))
|
||||||
|
remove_tags.append(dict(name = 'div', attrs = {'class' : 'links'}))
|
||||||
|
|
||||||
|
remove_tags.append(dict(name = 'div', attrs = {'style' : 'padding: 4px'}))
|
||||||
|
|
||||||
|
feeds = [
|
||||||
|
('Newsy', 'feed://ppe.pl/rss/rss.xml'),
|
||||||
|
]
|
32
recipes/presseurop.recipe
Normal file
@ -0,0 +1,32 @@
|
|||||||
|
#!/usr/bin/env python
|
||||||
|
|
||||||
|
'''
|
||||||
|
www.presseurop.eu/pl
|
||||||
|
'''
|
||||||
|
|
||||||
|
__license__ = 'GPL v3'
|
||||||
|
__author__ = 'teepel <teepel44@gmail.com>'
|
||||||
|
|
||||||
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
import re
|
||||||
|
|
||||||
|
class presseurop(BasicNewsRecipe):
|
||||||
|
title = u'Presseurop'
|
||||||
|
description = u'Najlepsze artykuły z prasy europejskiej'
|
||||||
|
oldest_article = 7
|
||||||
|
max_articles_per_feed = 100
|
||||||
|
auto_cleanup = True
|
||||||
|
|
||||||
|
feeds = [
|
||||||
|
(u'Polityka', u'http://www.presseurop.eu/pl/taxonomy/term/1/%2A/feed'),
|
||||||
|
(u'Społeczeństwo', u'http://www.presseurop.eu/pl/taxonomy/term/2/%2A/feed'),
|
||||||
|
(u'Gospodarka', u'http://www.presseurop.eu/pl/taxonomy/term/3/%2A/feed'),
|
||||||
|
(u'Kultura i debaty', u'http://www.presseurop.eu/pl/taxonomy/term/4/%2A/feed'),
|
||||||
|
(u'UE i Świat', u'http://www.presseurop.eu/pl/taxonomy/term/5/%2A/feed')
|
||||||
|
]
|
||||||
|
|
||||||
|
|
||||||
|
preprocess_regexps = [
|
||||||
|
(re.compile(r'\|.*</title>', re.DOTALL|re.IGNORECASE),
|
||||||
|
lambda match: '</title>'),
|
||||||
|
]
|
34
recipes/res_publica.recipe
Normal file
@ -0,0 +1,34 @@
|
|||||||
|
import re
|
||||||
|
|
||||||
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
from calibre.utils.magick import Image
|
||||||
|
|
||||||
|
class ResPublicaNowaRecipe(BasicNewsRecipe):
|
||||||
|
__license__ = 'GPL v3'
|
||||||
|
__author__ = u'intromatyk <intromatyk@gmail.com>'
|
||||||
|
language = 'pl'
|
||||||
|
version = 1
|
||||||
|
|
||||||
|
title = u'Res Publica Nowa'
|
||||||
|
category = u'News'
|
||||||
|
description = u'Portal kulturalno-społecznego kwartalnika o profilu liberalnym, wydawany przez Fundację Res Publica'
|
||||||
|
cover_url=''
|
||||||
|
remove_empty_feeds= True
|
||||||
|
no_stylesheets=True
|
||||||
|
oldest_article = 7
|
||||||
|
max_articles_per_feed = 100000
|
||||||
|
recursions = 0
|
||||||
|
no_stylesheets = True
|
||||||
|
remove_javascript = True
|
||||||
|
simultaneous_downloads = 5
|
||||||
|
|
||||||
|
feeds = [
|
||||||
|
('Artykuly', 'feed://publica.pl/feed'),
|
||||||
|
]
|
||||||
|
|
||||||
|
def preprocess_html(self, soup):
|
||||||
|
for alink in soup.findAll('a'):
|
||||||
|
if alink.string is not None:
|
||||||
|
tstr = alink.string
|
||||||
|
alink.replaceWith(tstr)
|
||||||
|
return soup
|
27
recipes/wolne_media.recipe
Normal file
@ -0,0 +1,27 @@
|
|||||||
|
#!/usr/bin/env python
|
||||||
|
|
||||||
|
__license__ = 'GPL v3'
|
||||||
|
__author__ = 'teepel <teepel44@gmail.com>'
|
||||||
|
|
||||||
|
'''
|
||||||
|
wolnemedia.net
|
||||||
|
'''
|
||||||
|
|
||||||
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
import re
|
||||||
|
|
||||||
|
class wolne_media(AutomaticNewsRecipe):
|
||||||
|
title = u'Wolne Media'
|
||||||
|
__author__ = 'teepel <teepel44@gmail.com>'
|
||||||
|
language = 'pl'
|
||||||
|
description ='Wiadomości z wolnemedia.net'
|
||||||
|
INDEX='http://wolnemedia.net'
|
||||||
|
oldest_article = 1
|
||||||
|
max_articles_per_feed = 100
|
||||||
|
remove_empty_feeds= True
|
||||||
|
simultaneous_downloads = 5
|
||||||
|
remove_javascript=True
|
||||||
|
no_stylesheets=True
|
||||||
|
auto_cleanup = True
|
||||||
|
|
||||||
|
feeds = [(u'Wiadomości z kraju', u'http://wolnemedia.net/category/wiadomosci-z-kraju/feed/'),(u'Wiadomości ze świata', u'http://wolnemedia.net/category/wiadomosci-ze-swiata/feed/'),(u'Edukacja', u'http://wolnemedia.net/category/edukacja/feed/'),(u'Ekologia', u'http://wolnemedia.net/category/ekologia/feed/'),(u'Gospodarka', u'http://wolnemedia.net/category/gospodarka/feed/'),(u'Historia', u'http://wolnemedia.net/category/historia/feed/'),(u'Kultura', u'http://wolnemedia.net/category/kultura/feed/'),(u'Kulturoznawstwo', u'http://wolnemedia.net/category/kulturoznawstwo/feed/'),(u'Media', u'http://wolnemedia.net/category/media/feed/'),(u'Nauka', u'http://wolnemedia.net/category/nauka/feed/'),(u'Opowiadania', u'http://wolnemedia.net/category/opowiadania/feed/'),(u'Paranauka i ezoteryka', u'http://wolnemedia.net/category/ezoteryka/feed/'),(u'Polityka', u'http://wolnemedia.net/category/polityka/feed/'),(u'Prawo', u'http://wolnemedia.net/category/prawo/feed/'),(u'Publicystyka', u'http://wolnemedia.net/category/publicystyka/feed/'),(u'Reportaż', u'http://wolnemedia.net/category/reportaz/feed/'),(u'Seks', u'http://wolnemedia.net/category/seks/feed/'),(u'Społeczeństwo', u'http://wolnemedia.net/category/spoleczenstwo/feed/'),(u'Świat komputerów', u'http://wolnemedia.net/category/swiat-komputerow/feed/'),(u'Wierzenia', u'http://wolnemedia.net/category/wierzenia/feed/'),(u'Zdrowie', u'http://wolnemedia.net/category/zdrowie/feed/')]
|