This commit is contained in:
Kovid Goyal 2018-10-18 08:33:40 +05:30
commit f32426a3d0
No known key found for this signature in database
GPG Key ID: 06BC317B515ACE7C
56 changed files with 17 additions and 446 deletions

Binary file not shown.

Before

Width:  |  Height:  |  Size: 583 B

Binary file not shown.

Before

Width:  |  Height:  |  Size: 160 B

Binary file not shown.

Before

Width:  |  Height:  |  Size: 432 B

Binary file not shown.

Before

Width:  |  Height:  |  Size: 345 B

Binary file not shown.

Before

Width:  |  Height:  |  Size: 345 B

Binary file not shown.

Before

Width:  |  Height:  |  Size: 680 B

Binary file not shown.

Before

Width:  |  Height:  |  Size: 124 B

Binary file not shown.

Before

Width:  |  Height:  |  Size: 240 B

Binary file not shown.

Before

Width:  |  Height:  |  Size: 929 B

Binary file not shown.

Before

Width:  |  Height:  |  Size: 440 B

Binary file not shown.

Before

Width:  |  Height:  |  Size: 1.3 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 263 B

Binary file not shown.

Before

Width:  |  Height:  |  Size: 263 B

Binary file not shown.

Before

Width:  |  Height:  |  Size: 1.1 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 161 B

Binary file not shown.

Before

Width:  |  Height:  |  Size: 357 B

Binary file not shown.

Before

Width:  |  Height:  |  Size: 670 B

Binary file not shown.

Before

Width:  |  Height:  |  Size: 488 B

Binary file not shown.

Before

Width:  |  Height:  |  Size: 725 B

Binary file not shown.

Before

Width:  |  Height:  |  Size: 184 B

Binary file not shown.

Before

Width:  |  Height:  |  Size: 353 B

Binary file not shown.

Before

Width:  |  Height:  |  Size: 638 B

Binary file not shown.

Before

Width:  |  Height:  |  Size: 316 B

Binary file not shown.

Before

Width:  |  Height:  |  Size: 258 B

Binary file not shown.

Before

Width:  |  Height:  |  Size: 226 B

Binary file not shown.

Before

Width:  |  Height:  |  Size: 466 B

Binary file not shown.

Before

Width:  |  Height:  |  Size: 234 B

Binary file not shown.

Before

Width:  |  Height:  |  Size: 581 B

Binary file not shown.

Before

Width:  |  Height:  |  Size: 445 B

Binary file not shown.

Before

Width:  |  Height:  |  Size: 293 B

Binary file not shown.

Before

Width:  |  Height:  |  Size: 190 B

Binary file not shown.

Before

Width:  |  Height:  |  Size: 145 B

Binary file not shown.

Before

Width:  |  Height:  |  Size: 425 B

Binary file not shown.

Before

Width:  |  Height:  |  Size: 368 B

Binary file not shown.

Before

Width:  |  Height:  |  Size: 182 B

Binary file not shown.

Before

Width:  |  Height:  |  Size: 644 B

Binary file not shown.

Before

Width:  |  Height:  |  Size: 590 B

Binary file not shown.

Before

Width:  |  Height:  |  Size: 539 B

Binary file not shown.

Before

Width:  |  Height:  |  Size: 700 B

Binary file not shown.

Before

Width:  |  Height:  |  Size: 144 B

View File

@ -1,19 +0,0 @@
from calibre.web.feeds.news import BasicNewsRecipe
class NowyEkran(BasicNewsRecipe):
title = u'Nowy ekran'
oldest_article = 7
max_articles_per_feed = 100
no_stylesheets = True
__author__ = 'fenuks'
description = u'Niezależny serwis społeczności blogerów'
category = 'blog'
language = 'pl'
masthead_url = 'http://s.nowyekran.pl/gfx/ekran-big.gif'
cover_url = 'http://s.nowyekran.pl/gfx/ekran-big.gif'
remove_tags_before = dict(name='div', attrs={'class': 'post_detal'})
remove_tags_after = dict(name='div', attrs={'class': 'post_footer'})
remove_tags = [dict(name='span', attrs={'class': 'ico ico_comments'}), dict(
name='div', attrs={'class': 'post_footer'}), dict(name='a', attrs={'class': 'getpdf'})]
feeds = [(u'Najnowsze notki', u'http://www.nowyekran.pl/RSS/')]

View File

@ -1,62 +0,0 @@
from calibre.web.feeds.news import BasicNewsRecipe
class NTO(BasicNewsRecipe):
title = u'Nowa Trybuna Opolska'
__author__ = 'fenuks'
description = u'Nowa Trybuna Opolska - portal regionalny województwa opolskiego.'
category = 'newspaper'
language = 'pl'
encoding = 'iso-8859-2'
extra_css = 'ul {list-style: none; padding:0; margin:0;}'
INDEX = 'http://www.nto.pl'
masthead_url = INDEX + '/images/top_logo.png'
oldest_article = 7
max_articles_per_feed = 100
remove_empty_feeds = True
no_stylesheets = True
ignore_duplicate_articles = {'title', 'url'}
use_embedded_content = False
feeds = [
(u'Wszystkie', u'http://www.nto.pl/rss.xml'),
(u'Region', u'http://www.nto.pl/region.xml'),
(u'Brzeg', u'http://www.nto.pl/brzeg.xml'),
(u'G\u0142ubczyce', u'http://www.nto.pl/glubczyce.xml'),
(u'K\u0119dzierzyn-Ko\u017ale', u'http://www.nto.pl/kedzierzynkozle.xml'),
(u'Kluczbork', u'http://www.nto.pl/kluczbork.xml'),
(u'Krapkowice', u'http://www.nto.pl/krapkowice.xml'),
(u'Namys\u0142\xf3w', u'http://www.nto.pl/namyslow.xml'),
(u'Nysa', u'http://www.nto.pl/nysa.xml'),
(u'Olesno', u'http://www.nto.pl/olesno.xml'),
(u'Opole', u'http://www.nto.pl/opole.xml'),
(u'Prudnik', u'http://www.nto.pl/prudnik.xml'),
(u'Strzelce Opolskie', u'http://www.nto.pl/strzelceopolskie.xml'),
(u'Sport', u'http://www.nto.pl/sport.xml'),
(u'Polska i \u015bwiat', u'http://www.nto.pl/apps/pbcs.dll/section?Category=RSS&channel=KRAJSWIAT'),
(u'Zdrowy styl', u'http://www.nto.pl/apps/pbcs.dll/section?Category=rss_zdrowystyl'),
(u'Reporta\u017c', u'http://www.nto.pl/reportaz.xml'),
(u'Studia', u'http://www.nto.pl/akademicka.xml')]
keep_only_tags = [dict(id='article')]
def get_cover_url(self):
soup = self.index_to_soup(
self.INDEX + '/apps/pbcs.dll/section?Category=JEDYNKI')
nexturl = self.INDEX + soup.find(id='covers').find('a')['href']
soup = self.index_to_soup(nexturl)
self.cover_url = self.INDEX + \
soup.find(id='cover').find(name='img')['src']
return getattr(self, 'cover_url', self.cover_url)
def decode_feedportal_url(self, url):
link = url.rpartition('l/0L0S')[2][:-12]
replaces = (('0B', '.'), ('0C', '/'), ('0H', ','),
('0D', '?'), ('0F', '='), ('0A', '0'), ('0I', '_'))
for t in replaces:
link = link.replace(*t)
return 'http://' + link
def print_version(self, url):
return self.decode_feedportal_url(url) + '&Template=printpicart'

View File

@ -15,19 +15,16 @@ class OptyczneRecipe(BasicNewsRecipe):
remove_empty_feeds = True
no_stylesheets = True
oldest_article = 7
max_articles_per_feed = 100000
max_articles_per_feed = 100
recursions = 0
no_stylesheets = True
remove_javascript = True
keep_only_tags = []
keep_only_tags.append(dict(name='div', attrs={'class': 'news'}))
keep_only_tags = dict(name='div', attrs={'class':'main-article-content'})
remove_tags = []
remove_tags.append(dict(name='div', attrs={'class': 'center'}))
remove_tags.append(dict(name='div', attrs={'class': 'news_foto'}))
remove_tags.append(dict(name='div', attrs={'align': 'right'}))
remove_tags = [dict(name='div', attrs={'class':['banner','colored','content-panel']}),
dict(name='a', attrs={'class':'icon-link comments-link'})]
extra_css = '''
body {font-family: Arial,Helvetica,sans-serif;}
@ -38,5 +35,5 @@ class OptyczneRecipe(BasicNewsRecipe):
.fot{font-size: x-small; color: #666666;}
'''
feeds = [
('Aktualnosci', 'http://www.optyczne.pl/rss.xml'),
(u'Aktualności', 'http://www.optyczne.pl/rss.xml'),
]

View File

@ -27,16 +27,14 @@ class OSW_Recipe(BasicNewsRecipe):
simultaneous_downloads = 5
keep_only_tags = []
# this line should show title of the article, but it doesnt work
keep_only_tags.append(dict(name='h1', attrs={'class': 'print-title'}))
keep_only_tags.append(dict(name='div', attrs={'class': 'print-submitted'}))
keep_only_tags.append(dict(name='div', attrs={'class': 'print-content'}))
keep_only_tags.append(dict(name='h2', attrs={'class': 'node-title'}))
keep_only_tags.append(dict(name='div', attrs={'class': 'content clearfix'}))
remove_tags = []
remove_tags.append(dict(name='table', attrs={'id': 'attachments'}))
remove_tags.append(dict(name='div', attrs={'class': 'print-submitted'}))
feeds = [(u'OSW', u'http://www.osw.waw.pl/pl/rss.xml')]
feeds = [(u'OSW', u'https://www.osw.waw.pl/pl/rss.xml')]
def print_version(self, url):
return url.replace('http://www.osw.waw.pl/pl/', 'http://www.osw.waw.pl/pl/print/')
return url.replace('https://www.osw.waw.pl/pl/', 'https://www.osw.waw.pl/pl/print/')

View File

@ -1,36 +0,0 @@
from calibre.web.feeds.news import BasicNewsRecipe
class OSWorld(BasicNewsRecipe):
title = u'OSWorld.pl'
__author__ = 'fenuks'
description = u'OSWorld.pl to serwis internetowy, dzięki któremu poznasz czym naprawdę jest Open Source. Serwis poświęcony jest wolnemu oprogramowaniu jak linux mint, centos czy ubunty. Znajdziecie u nasz artykuły, unity oraz informacje o certyfikatach CACert. OSWorld to mały świat wielkich systemów!' # noqa
category = 'OS, IT, open source, Linux'
language = 'pl'
cover_url = 'http://osworld.pl/wp-content/uploads/osworld-kwadrat-128x111.png'
extra_css = 'img.alignleft {float: left; margin-right: 5px;}'
oldest_article = 7
max_articles_per_feed = 100
no_stylesheets = True
remove_empty_feeds = True
use_embedded_content = False
keep_only_tags = [dict(id=['dzial', 'posts'])]
remove_tags = [dict(attrs={'class': 'post-comments'})]
remove_tags_after = dict(attrs={'class': 'entry clr'})
feeds = [(u'Artyku\u0142y', u'http://osworld.pl/category/artykuly/feed/'),
(u'Nowe wersje', u'http://osworld.pl/category/nowe-wersje/feed/')]
def append_page(self, soup, appendtag):
tag = appendtag.find(attrs={'id': 'paginacja'})
if tag:
for nexturl in tag.findAll('a'):
soup2 = self.index_to_soup(nexturl['href'])
pagetext = soup2.find(attrs={'class': 'entry clr'})
pos = len(appendtag.contents)
appendtag.insert(pos, pagetext)
for r in appendtag.findAll(attrs={'id': 'paginacja'}):
r.extract()
def preprocess_html(self, soup):
self.append_page(soup, soup.body)
return soup

View File

@ -1,17 +0,0 @@
from calibre.web.feeds.news import BasicNewsRecipe
class palmtop_pl(BasicNewsRecipe):
title = u'Palmtop.pl'
__author__ = 'fenuks'
description = 'wortal technologii mobilnych'
category = 'mobile'
language = 'pl'
cover_url = 'http://cdn.g-point.biz/wp-content/themes/palmtop-new/images/header_palmtop_logo.png'
masthead_url = 'http://cdn.g-point.biz/wp-content/themes/palmtop-new/images/header_palmtop_logo.png'
oldest_article = 7
max_articles_per_feed = 100
no_stylesheets = True
use_embedded_content = True
# remove_tags_before=dict(name='h2')
feeds = [(u'Newsy', u'http://palmtop.pl/feed/atom/')]

View File

@ -1,37 +0,0 @@
from calibre.web.feeds.news import BasicNewsRecipe
class PC_Arena(BasicNewsRecipe):
title = u'PCArena'
oldest_article = 7
max_articles_per_feed = 100
__author__ = 'fenuks'
description = u'Najnowsze informacje z branży IT - testy, recenzje, aktualności, rankingi, wywiady. Twoje źródło informacji o sprzęcie komputerowym.'
category = 'IT'
language = 'pl'
index = 'http://pcarena.pl'
masthead_url = 'http://pcarena.pl/pcarena/img/logo.png'
cover_url = 'http://pcarena.pl/pcarena/img/logo.png'
no_stylesheets = True
remove_empty_feeds = True
feeds = [
(u'Aktualności', u'http://pcarena.pl/aktualnosci/feeds.rss'),
(u'Testy', u'http://pcarena.pl/testy/feeds.rss'),
(u'Software', u'http://pcarena.pl/oprogramowanie/feeds.rss'),
(u'Poradniki', u'http://pcarena.pl/poradniki/feeds.rss'),
(u'Mobile', u'http://pcarena.pl/mobile/feeds.rss')]
def print_version(self, url):
return url.replace('show', 'print')
def image_url_processor(self, baseurl, url):
if 'http' not in url:
return 'http://pcarena.pl' + url
else:
return url
def preprocess_html(self, soup):
for a in soup('a'):
if a.has_key('href') and 'http://' not in a['href'] and 'https://' not in a['href']: # noqa
a['href'] = self.index + a['href']
return soup

View File

@ -1,30 +0,0 @@
from calibre.web.feeds.news import BasicNewsRecipe
class PC_Centre(BasicNewsRecipe):
title = u'PC Centre'
oldest_article = 7
max_articles_per_feed = 100
__author__ = 'fenuks'
description = u'Portal komputerowy, a w nim: testy sprzętu komputerowego, recenzje gier i oprogramowania. a także opisy produktów związanych z komputerami.'
category = 'IT'
language = 'pl'
masthead_url = 'http://pccentre.pl/views/images/logo.gif'
cover_url = 'http://pccentre.pl/views/images/logo.gif'
no_stylesheets = True
remove_empty_feeds = True
ignore_duplicate_articles = {'title', 'url'}
remove_tags = [dict(attrs={'class': 'logo_print'})]
feeds = [
(u'Aktualno\u015bci', u'http://pccentre.pl/backend.php'),
(u'Publikacje', u'http://pccentre.pl/backend.php?mode=a'),
(u'Sprz\u0119t komputerowy', u'http://pccentre.pl/backend.php?mode=n&section=2'),
(u'Oprogramowanie', u'http://pccentre.pl/backend.php?mode=n&section=3'),
(u'Gry komputerowe i konsole', u'http://pccentre.pl/backend.php?mode=n&section=4'),
(u'Internet', u'http://pccentre.pl/backend.php?mode=n&section=7'),
(u'Bezpiecze\u0144stwo', u'http://pccentre.pl/backend.php?mode=n&section=5'),
(u'Multimedia', u'http://pccentre.pl/backend.php?mode=n&section=6'),
(u'Biznes', u'http://pccentre.pl/backend.php?mode=n&section=9')]
def print_version(self, url):
return url.replace('show', 'print')

View File

@ -75,9 +75,7 @@ class PCLab(BasicNewsRecipe):
href = link.get('href', None)
if href and href.startswith('/'):
link['href'] = 'http://pclab.pl' + href
# finally remove some tags
# for r in soup.findAll('div', attrs={'class':['tags', 'index',
# 'script_bxad_slot_display_list_bxad_slot', 'index first', 'zumi',
# 'navigation']})
for r in soup.findAll(name='a', href=re.compile(r'^https://www.skapiec.pl/')):
r.extract()
return soup

View File

@ -1,36 +0,0 @@
from calibre.web.feeds.news import BasicNewsRecipe
class Pikoboard(BasicNewsRecipe):
title = u'Pikoboard.pl'
__author__ = 'fenuks'
description = u'Portal poświęcony takim urządzeniom jak: Raspberry Pi, XBMC, ODROID-X, BeagleBoard czy CuBox. Systemy operacyjne, modyfikacje oraz obudowy i innego rodzaju dodatki.' # noqa
category = 'IT, open source, Linux, Raspberry Pi'
language = 'pl'
cover_url = 'http://picoboard.pl/wp-content/themes/portal/img/logo.jpg'
extra_css = 'img.alignleft {float: left; margin-right: 5px;}'
oldest_article = 7
max_articles_per_feed = 100
no_stylesheets = True
remove_empty_feeds = True
use_embedded_content = False
keep_only_tags = [dict(id=['dzial', 'posts'])]
remove_tags = [dict(attrs={'class': 'post-comments'})]
remove_tags_after = dict(attrs={'class': 'entry clr'})
feeds = [(u'Newsy', u'http://picoboard.pl/feed/atom/'),
(u'Artyku\u0142y', u'http://picoboard.pl/category/artykuly/feed/')]
def append_page(self, soup, appendtag):
tag = appendtag.find(attrs={'id': 'paginacja'})
if tag:
for nexturl in tag.findAll('a'):
soup2 = self.index_to_soup(nexturl['href'])
pagetext = soup2.find(attrs={'class': 'entry clr'})
pos = len(appendtag.contents)
appendtag.insert(pos, pagetext)
for r in appendtag.findAll(attrs={'id': 'paginacja'}):
r.extract()
def preprocess_html(self, soup):
self.append_page(soup, soup.body)
return soup

View File

@ -1,42 +0,0 @@
from calibre.web.feeds.news import BasicNewsRecipe
class PolskaTimes(BasicNewsRecipe):
title = u'Polska Times'
__author__ = 'fenuks'
description = u'Internetowe wydanie dziennika ogólnopolskiego Polska The Times. Najświeższe informacje: wydarzenia w kraju i na świecie, reportaże, poradniki, opinie.' # noqa
category = 'newspaper'
language = 'pl'
masthead_url = 'http://s.polskatimes.pl/g/logo_naglowek/polska.gif?17'
oldest_article = 7
encoding = 'iso-8859-2'
max_articles_per_feed = 100
remove_empty_feeds = True
no_stylesheets = True
use_embedded_content = False
ignore_duplicate_articles = {'title', 'url'}
remove_tags_after = dict(attrs={'src': 'http://nm.dz.com.pl/dz.png'})
remove_tags = [dict(id='mat-podobne'), dict(name='a', attrs={
'class': 'czytajDalej'}), dict(attrs={'src': 'http://nm.dz.com.pl/dz.png'})]
feeds = [
(u'Fakty', u'http://polskatimes.feedsportal.com/c/32980/f/533648/index.rss'),
(u'Opinie', u'http://www.polskatimes.pl/rss/opinie.xml'),
(u'Sport', u'http://polskatimes.feedsportal.com/c/32980/f/533649/index.rss'),
(u'Pieni\u0105dze', u'http://polskatimes.feedsportal.com/c/32980/f/533657/index.rss'),
(u'Twoje finanse', u'http://www.polskatimes.pl/rss/twojefinanse.xml'),
(u'Kultura', u'http://polskatimes.feedsportal.com/c/32980/f/533650/index.rss'),
(u'Dodatki', u'http://www.polskatimes.pl/rss/dodatki.xml')]
def print_version(self, url):
return url.replace('artykul', 'drukuj')
def skip_ad_pages(self, soup):
if 'Advertisement' in soup.title:
nexturl = soup.find('a')['href']
return self.index_to_soup(nexturl, raw=True)
def get_cover_url(self):
soup = self.index_to_soup(
'http://www.prasa24.pl/gazeta/metropolia-warszawska/')
self.cover_url = soup.find(id='pojemnik').img['src']
return getattr(self, 'cover_url', self.cover_url)

View File

@ -21,7 +21,8 @@ class Polter(BasicNewsRecipe):
ignore_duplicate_articles = {'title', 'url'}
keep_only_tags = [dict(attrs={'class': 'boxcontent'})]
remove_tags = [dict(id='komentarze')]
remove_tags = [dict(id='komentarze'),
dict(name='div',attrs={'class':'ostatnieArtykuly'})]
remove_tags_after = dict(id='komentarze')
feeds = [
@ -36,8 +37,7 @@ class Polter(BasicNewsRecipe):
(u'Gry planszowe', 'http://planszowki.polter.pl/wiesci,rss.html'),
(u'Gry PC', 'http://gry.polter.pl/wiesci,rss.html'),
(u'Gry konsolowe', 'http://konsole.polter.pl/wiesci,rss.html'),
(u'Konwenty', 'http://konwenty.polter.pl/wiesci,rss.html'),
(u'Blogi', 'http://polter.pl/blogi,rss.html')]
(u'Konwenty', 'http://konwenty.polter.pl/wiesci,rss.html')]
def preprocess_html(self, soup):
for s in soup.findAll(attrs={'style': re.compile('float: ?left')}):
@ -65,3 +65,6 @@ class Polter(BasicNewsRecipe):
for r in soup.findAll(name='a', href=re.compile(r'^http://www.ceneo.pl/')):
r.extract()
return soup
def preprocess_raw_html(self, raw_html, url):
return raw_html.replace('<br /><br /><h3>Czytaj również</h3>', '')

View File

@ -1,63 +0,0 @@
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:fdm=marker:ai
from calibre.web.feeds.news import BasicNewsRecipe
class PoradniaPWN(BasicNewsRecipe):
title = u'Poradnia Językowa PWN'
__author__ = 'fenuks'
description = u'Internetowa poradnia językowa Wydawnictwa Naukowego PWN. Poradnię prowadzi Redaktor Naczelny Słowników Języka Polskiego, prof. Mirosław Bańko. Pomagają mu eksperci - znani polscy językoznawcy. Współpracuje z nami m.in. prof. Jerzy Bralczyk oraz dr Jan Grzenia.' # noqa
category = 'language'
language = 'pl'
oldest_article = 14
max_articles_per_feed = 100000
INDEX = "http://poradnia.pwn.pl/"
no_stylesheets = True
remove_attributes = ['style']
remove_javascript = True
use_embedded_content = False
keep_only_tags = [dict(name="div", attrs={"class": "searchhi"})]
feeds = [(u'Poradnia', u'http://rss.pwn.pl/poradnia.rss')]
'''def find_articles(self, url):
articles = []
soup=self.index_to_soup(url)
counter = int(soup.find(name='p', attrs={'class':'count'}).findAll('b')[-1].string)
counter = 500
pos = 0
next = url
while next:
soup=self.index_to_soup(next)
tag=soup.find(id="listapytan")
art=tag.findAll(name='li')
for i in art:
if i.h4:
title=i.h4.a.string
url=self.INDEX+i.h4.a['href']
#date=soup.find(id='footer').ul.li.string[41:-1]
articles.append({'title' : title,
'url' : url,
'date' : '',
'description' : ''
})
pos += 10
if not pos >=counter:
next = 'http://poradnia.pwn.pl/lista.php?kat=18&od=' + str(pos)
print u'Tworzenie listy artykułów dla', next
else:
next = None
print articles
return articles
def parse_index(self):
feeds = []
feeds.append((u"Poradnia", self.find_articles('http://poradnia.pwn.pl/lista.php')))
return feeds'''
def preprocess_html(self, soup):
for i in soup.findAll(name=['ul', 'li']):
i.name = "div"
for z in soup.findAll(name='a'):
if not z['href'].startswith('http'):
z['href'] = 'http://poradnia.pwn.pl/' + z['href']
return soup

View File

@ -29,9 +29,3 @@ class ppeRecipe(BasicNewsRecipe):
('Recenzje', 'http://ppe.pl/rss-recenzje.html'),
('Publicystyka', 'http://ppe.pl/rss-publicystyka.html'),
]
def get_cover_url(self):
soup = self.index_to_soup('http://www.ppe.pl/psx_extreme.html')
part = soup.find(attrs={'class': 'archiwum-foto'})['style']
part = re.search("'(.+)'", part).group(1).replace('_min', '')
return 'http://www.ppe.pl' + part

View File

@ -1,43 +0,0 @@
#!/usr/bin/env python2
__license__ = 'GPL v3'
__author__ = 'teepel <teepel44@gmail.com>'
'''
http://prawica.net
'''
from calibre.web.feeds.news import BasicNewsRecipe
class prawica_recipe(BasicNewsRecipe):
title = u'prawica.net'
__author__ = 'teepel <teepel44@gmail.com>'
language = 'pl'
description = 'Wiadomości ze strony prawica.net'
INDEX = 'http://prawica.net/'
remove_empty_feeds = True
oldest_article = 1
max_articles_per_feed = 100
remove_javascript = True
no_stylesheets = True
feeds = [(u'all', u'http://prawica.net/all/feed')]
keep_only_tags = []
# this line should show title of the article, but it doesnt work
keep_only_tags.append(dict(name='h1', attrs={'class': 'print-title'}))
keep_only_tags.append(dict(name='div', attrs={'class': 'content'}))
remove_tags = []
remove_tags.append(dict(name='div', attrs={
'class': 'field field-type-viewfield field-field-autor2'}))
remove_tags.append(dict(name='div', attrs={
'class': 'field field-type-viewfield field-field-publikacje-autora'}))
remove_tags.append(dict(name='div', attrs={
'id': 'rate-widget-2 rate-widget clear-block rate-average rate-widget-fivestar rate-daa7512627f21dcf15e0af47e5279f0e rate-processed'}))
remove_tags_after = [
(dict(name='div', attrs={'class': 'field-label-inline-first'}))]
def print_version(self, url):
return url.replace('http://prawica.net/', 'http://prawica.net/print/')

View File

@ -1,34 +0,0 @@
#!/usr/bin/env python2
'''
www.presseurop.eu/pl
'''
__license__ = 'GPL v3'
__author__ = 'teepel <teepel44@gmail.com>'
from calibre.web.feeds.news import BasicNewsRecipe
import re
class presseurop(BasicNewsRecipe):
title = u'Presseurop'
description = u'Najlepsze artykuły z prasy europejskiej'
language = 'pl'
oldest_article = 7
max_articles_per_feed = 100
auto_cleanup = True
remove_empty_feeds = True
feeds = [
(u'Polityka', u'http://www.presseurop.eu/pl/taxonomy/term/1/%2A/feed'),
(u'Społeczeństwo', u'http://www.presseurop.eu/pl/taxonomy/term/2/%2A/feed'),
(u'Gospodarka', u'http://www.presseurop.eu/pl/taxonomy/term/3/%2A/feed'),
(u'Kultura i debaty', u'http://www.presseurop.eu/pl/taxonomy/term/4/%2A/feed'),
(u'UE i Świat', u'http://www.presseurop.eu/pl/taxonomy/term/5/%2A/feed')
]
preprocess_regexps = [
(re.compile(r'\|.*</title>', re.DOTALL | re.IGNORECASE),
lambda match: '</title>'),
]