Merge branch 'master' of https://github.com/t3d/calibre
Before Width: | Height: | Size: 583 B |
Before Width: | Height: | Size: 160 B |
Before Width: | Height: | Size: 432 B |
Before Width: | Height: | Size: 345 B |
Before Width: | Height: | Size: 345 B |
Before Width: | Height: | Size: 680 B |
Before Width: | Height: | Size: 124 B |
Before Width: | Height: | Size: 240 B |
Before Width: | Height: | Size: 929 B |
Before Width: | Height: | Size: 440 B |
Before Width: | Height: | Size: 1.3 KiB |
Before Width: | Height: | Size: 263 B |
Before Width: | Height: | Size: 263 B |
Before Width: | Height: | Size: 1.1 KiB |
Before Width: | Height: | Size: 161 B |
Before Width: | Height: | Size: 357 B |
Before Width: | Height: | Size: 670 B |
Before Width: | Height: | Size: 488 B |
Before Width: | Height: | Size: 725 B |
Before Width: | Height: | Size: 184 B |
Before Width: | Height: | Size: 353 B |
Before Width: | Height: | Size: 638 B |
Before Width: | Height: | Size: 316 B |
Before Width: | Height: | Size: 258 B |
Before Width: | Height: | Size: 226 B |
Before Width: | Height: | Size: 466 B |
Before Width: | Height: | Size: 234 B |
Before Width: | Height: | Size: 581 B |
Before Width: | Height: | Size: 445 B |
Before Width: | Height: | Size: 293 B |
Before Width: | Height: | Size: 190 B |
Before Width: | Height: | Size: 145 B |
Before Width: | Height: | Size: 425 B |
Before Width: | Height: | Size: 368 B |
Before Width: | Height: | Size: 182 B |
Before Width: | Height: | Size: 644 B |
Before Width: | Height: | Size: 590 B |
Before Width: | Height: | Size: 539 B |
Before Width: | Height: | Size: 700 B |
Before Width: | Height: | Size: 144 B |
@ -1,19 +0,0 @@
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
|
||||
class NowyEkran(BasicNewsRecipe):
|
||||
title = u'Nowy ekran'
|
||||
oldest_article = 7
|
||||
max_articles_per_feed = 100
|
||||
no_stylesheets = True
|
||||
__author__ = 'fenuks'
|
||||
description = u'Niezależny serwis społeczności blogerów'
|
||||
category = 'blog'
|
||||
language = 'pl'
|
||||
masthead_url = 'http://s.nowyekran.pl/gfx/ekran-big.gif'
|
||||
cover_url = 'http://s.nowyekran.pl/gfx/ekran-big.gif'
|
||||
remove_tags_before = dict(name='div', attrs={'class': 'post_detal'})
|
||||
remove_tags_after = dict(name='div', attrs={'class': 'post_footer'})
|
||||
remove_tags = [dict(name='span', attrs={'class': 'ico ico_comments'}), dict(
|
||||
name='div', attrs={'class': 'post_footer'}), dict(name='a', attrs={'class': 'getpdf'})]
|
||||
feeds = [(u'Najnowsze notki', u'http://www.nowyekran.pl/RSS/')]
|
@ -1,62 +0,0 @@
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
|
||||
class NTO(BasicNewsRecipe):
|
||||
title = u'Nowa Trybuna Opolska'
|
||||
__author__ = 'fenuks'
|
||||
description = u'Nowa Trybuna Opolska - portal regionalny województwa opolskiego.'
|
||||
category = 'newspaper'
|
||||
language = 'pl'
|
||||
encoding = 'iso-8859-2'
|
||||
extra_css = 'ul {list-style: none; padding:0; margin:0;}'
|
||||
INDEX = 'http://www.nto.pl'
|
||||
masthead_url = INDEX + '/images/top_logo.png'
|
||||
oldest_article = 7
|
||||
max_articles_per_feed = 100
|
||||
remove_empty_feeds = True
|
||||
no_stylesheets = True
|
||||
ignore_duplicate_articles = {'title', 'url'}
|
||||
use_embedded_content = False
|
||||
|
||||
feeds = [
|
||||
(u'Wszystkie', u'http://www.nto.pl/rss.xml'),
|
||||
(u'Region', u'http://www.nto.pl/region.xml'),
|
||||
(u'Brzeg', u'http://www.nto.pl/brzeg.xml'),
|
||||
(u'G\u0142ubczyce', u'http://www.nto.pl/glubczyce.xml'),
|
||||
(u'K\u0119dzierzyn-Ko\u017ale', u'http://www.nto.pl/kedzierzynkozle.xml'),
|
||||
(u'Kluczbork', u'http://www.nto.pl/kluczbork.xml'),
|
||||
(u'Krapkowice', u'http://www.nto.pl/krapkowice.xml'),
|
||||
(u'Namys\u0142\xf3w', u'http://www.nto.pl/namyslow.xml'),
|
||||
(u'Nysa', u'http://www.nto.pl/nysa.xml'),
|
||||
(u'Olesno', u'http://www.nto.pl/olesno.xml'),
|
||||
|
||||
(u'Opole', u'http://www.nto.pl/opole.xml'),
|
||||
(u'Prudnik', u'http://www.nto.pl/prudnik.xml'),
|
||||
(u'Strzelce Opolskie', u'http://www.nto.pl/strzelceopolskie.xml'),
|
||||
(u'Sport', u'http://www.nto.pl/sport.xml'),
|
||||
(u'Polska i \u015bwiat', u'http://www.nto.pl/apps/pbcs.dll/section?Category=RSS&channel=KRAJSWIAT'),
|
||||
(u'Zdrowy styl', u'http://www.nto.pl/apps/pbcs.dll/section?Category=rss_zdrowystyl'),
|
||||
(u'Reporta\u017c', u'http://www.nto.pl/reportaz.xml'),
|
||||
(u'Studia', u'http://www.nto.pl/akademicka.xml')]
|
||||
|
||||
keep_only_tags = [dict(id='article')]
|
||||
|
||||
def get_cover_url(self):
|
||||
soup = self.index_to_soup(
|
||||
self.INDEX + '/apps/pbcs.dll/section?Category=JEDYNKI')
|
||||
nexturl = self.INDEX + soup.find(id='covers').find('a')['href']
|
||||
soup = self.index_to_soup(nexturl)
|
||||
self.cover_url = self.INDEX + \
|
||||
soup.find(id='cover').find(name='img')['src']
|
||||
return getattr(self, 'cover_url', self.cover_url)
|
||||
|
||||
def decode_feedportal_url(self, url):
|
||||
link = url.rpartition('l/0L0S')[2][:-12]
|
||||
replaces = (('0B', '.'), ('0C', '/'), ('0H', ','),
|
||||
('0D', '?'), ('0F', '='), ('0A', '0'), ('0I', '_'))
|
||||
for t in replaces:
|
||||
link = link.replace(*t)
|
||||
return 'http://' + link
|
||||
|
||||
def print_version(self, url):
|
||||
return self.decode_feedportal_url(url) + '&Template=printpicart'
|
@ -15,19 +15,16 @@ class OptyczneRecipe(BasicNewsRecipe):
|
||||
remove_empty_feeds = True
|
||||
no_stylesheets = True
|
||||
oldest_article = 7
|
||||
max_articles_per_feed = 100000
|
||||
max_articles_per_feed = 100
|
||||
recursions = 0
|
||||
|
||||
no_stylesheets = True
|
||||
remove_javascript = True
|
||||
|
||||
keep_only_tags = []
|
||||
keep_only_tags.append(dict(name='div', attrs={'class': 'news'}))
|
||||
keep_only_tags = dict(name='div', attrs={'class':'main-article-content'})
|
||||
|
||||
remove_tags = []
|
||||
remove_tags.append(dict(name='div', attrs={'class': 'center'}))
|
||||
remove_tags.append(dict(name='div', attrs={'class': 'news_foto'}))
|
||||
remove_tags.append(dict(name='div', attrs={'align': 'right'}))
|
||||
remove_tags = [dict(name='div', attrs={'class':['banner','colored','content-panel']}),
|
||||
dict(name='a', attrs={'class':'icon-link comments-link'})]
|
||||
|
||||
extra_css = '''
|
||||
body {font-family: Arial,Helvetica,sans-serif;}
|
||||
@ -38,5 +35,5 @@ class OptyczneRecipe(BasicNewsRecipe):
|
||||
.fot{font-size: x-small; color: #666666;}
|
||||
'''
|
||||
feeds = [
|
||||
('Aktualnosci', 'http://www.optyczne.pl/rss.xml'),
|
||||
(u'Aktualności', 'http://www.optyczne.pl/rss.xml'),
|
||||
]
|
||||
|
@ -27,16 +27,14 @@ class OSW_Recipe(BasicNewsRecipe):
|
||||
simultaneous_downloads = 5
|
||||
|
||||
keep_only_tags = []
|
||||
# this line should show title of the article, but it doesnt work
|
||||
keep_only_tags.append(dict(name='h1', attrs={'class': 'print-title'}))
|
||||
keep_only_tags.append(dict(name='div', attrs={'class': 'print-submitted'}))
|
||||
keep_only_tags.append(dict(name='div', attrs={'class': 'print-content'}))
|
||||
keep_only_tags.append(dict(name='h2', attrs={'class': 'node-title'}))
|
||||
keep_only_tags.append(dict(name='div', attrs={'class': 'content clearfix'}))
|
||||
|
||||
remove_tags = []
|
||||
remove_tags.append(dict(name='table', attrs={'id': 'attachments'}))
|
||||
remove_tags.append(dict(name='div', attrs={'class': 'print-submitted'}))
|
||||
|
||||
feeds = [(u'OSW', u'http://www.osw.waw.pl/pl/rss.xml')]
|
||||
feeds = [(u'OSW', u'https://www.osw.waw.pl/pl/rss.xml')]
|
||||
|
||||
def print_version(self, url):
|
||||
return url.replace('http://www.osw.waw.pl/pl/', 'http://www.osw.waw.pl/pl/print/')
|
||||
return url.replace('https://www.osw.waw.pl/pl/', 'https://www.osw.waw.pl/pl/print/')
|
||||
|
@ -1,36 +0,0 @@
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
|
||||
class OSWorld(BasicNewsRecipe):
|
||||
title = u'OSWorld.pl'
|
||||
__author__ = 'fenuks'
|
||||
description = u'OSWorld.pl to serwis internetowy, dzięki któremu poznasz czym naprawdę jest Open Source. Serwis poświęcony jest wolnemu oprogramowaniu jak linux mint, centos czy ubunty. Znajdziecie u nasz artykuły, unity oraz informacje o certyfikatach CACert. OSWorld to mały świat wielkich systemów!' # noqa
|
||||
category = 'OS, IT, open source, Linux'
|
||||
language = 'pl'
|
||||
cover_url = 'http://osworld.pl/wp-content/uploads/osworld-kwadrat-128x111.png'
|
||||
extra_css = 'img.alignleft {float: left; margin-right: 5px;}'
|
||||
oldest_article = 7
|
||||
max_articles_per_feed = 100
|
||||
no_stylesheets = True
|
||||
remove_empty_feeds = True
|
||||
use_embedded_content = False
|
||||
keep_only_tags = [dict(id=['dzial', 'posts'])]
|
||||
remove_tags = [dict(attrs={'class': 'post-comments'})]
|
||||
remove_tags_after = dict(attrs={'class': 'entry clr'})
|
||||
feeds = [(u'Artyku\u0142y', u'http://osworld.pl/category/artykuly/feed/'),
|
||||
(u'Nowe wersje', u'http://osworld.pl/category/nowe-wersje/feed/')]
|
||||
|
||||
def append_page(self, soup, appendtag):
|
||||
tag = appendtag.find(attrs={'id': 'paginacja'})
|
||||
if tag:
|
||||
for nexturl in tag.findAll('a'):
|
||||
soup2 = self.index_to_soup(nexturl['href'])
|
||||
pagetext = soup2.find(attrs={'class': 'entry clr'})
|
||||
pos = len(appendtag.contents)
|
||||
appendtag.insert(pos, pagetext)
|
||||
for r in appendtag.findAll(attrs={'id': 'paginacja'}):
|
||||
r.extract()
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
self.append_page(soup, soup.body)
|
||||
return soup
|
@ -1,17 +0,0 @@
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
|
||||
class palmtop_pl(BasicNewsRecipe):
|
||||
title = u'Palmtop.pl'
|
||||
__author__ = 'fenuks'
|
||||
description = 'wortal technologii mobilnych'
|
||||
category = 'mobile'
|
||||
language = 'pl'
|
||||
cover_url = 'http://cdn.g-point.biz/wp-content/themes/palmtop-new/images/header_palmtop_logo.png'
|
||||
masthead_url = 'http://cdn.g-point.biz/wp-content/themes/palmtop-new/images/header_palmtop_logo.png'
|
||||
oldest_article = 7
|
||||
max_articles_per_feed = 100
|
||||
no_stylesheets = True
|
||||
use_embedded_content = True
|
||||
# remove_tags_before=dict(name='h2')
|
||||
feeds = [(u'Newsy', u'http://palmtop.pl/feed/atom/')]
|
@ -1,37 +0,0 @@
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
|
||||
class PC_Arena(BasicNewsRecipe):
|
||||
title = u'PCArena'
|
||||
oldest_article = 7
|
||||
max_articles_per_feed = 100
|
||||
__author__ = 'fenuks'
|
||||
description = u'Najnowsze informacje z branży IT - testy, recenzje, aktualności, rankingi, wywiady. Twoje źródło informacji o sprzęcie komputerowym.'
|
||||
category = 'IT'
|
||||
language = 'pl'
|
||||
index = 'http://pcarena.pl'
|
||||
masthead_url = 'http://pcarena.pl/pcarena/img/logo.png'
|
||||
cover_url = 'http://pcarena.pl/pcarena/img/logo.png'
|
||||
no_stylesheets = True
|
||||
remove_empty_feeds = True
|
||||
feeds = [
|
||||
(u'Aktualności', u'http://pcarena.pl/aktualnosci/feeds.rss'),
|
||||
(u'Testy', u'http://pcarena.pl/testy/feeds.rss'),
|
||||
(u'Software', u'http://pcarena.pl/oprogramowanie/feeds.rss'),
|
||||
(u'Poradniki', u'http://pcarena.pl/poradniki/feeds.rss'),
|
||||
(u'Mobile', u'http://pcarena.pl/mobile/feeds.rss')]
|
||||
|
||||
def print_version(self, url):
|
||||
return url.replace('show', 'print')
|
||||
|
||||
def image_url_processor(self, baseurl, url):
|
||||
if 'http' not in url:
|
||||
return 'http://pcarena.pl' + url
|
||||
else:
|
||||
return url
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
for a in soup('a'):
|
||||
if a.has_key('href') and 'http://' not in a['href'] and 'https://' not in a['href']: # noqa
|
||||
a['href'] = self.index + a['href']
|
||||
return soup
|
@ -1,30 +0,0 @@
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
|
||||
class PC_Centre(BasicNewsRecipe):
|
||||
title = u'PC Centre'
|
||||
oldest_article = 7
|
||||
max_articles_per_feed = 100
|
||||
__author__ = 'fenuks'
|
||||
description = u'Portal komputerowy, a w nim: testy sprzętu komputerowego, recenzje gier i oprogramowania. a także opisy produktów związanych z komputerami.'
|
||||
category = 'IT'
|
||||
language = 'pl'
|
||||
masthead_url = 'http://pccentre.pl/views/images/logo.gif'
|
||||
cover_url = 'http://pccentre.pl/views/images/logo.gif'
|
||||
no_stylesheets = True
|
||||
remove_empty_feeds = True
|
||||
ignore_duplicate_articles = {'title', 'url'}
|
||||
remove_tags = [dict(attrs={'class': 'logo_print'})]
|
||||
feeds = [
|
||||
(u'Aktualno\u015bci', u'http://pccentre.pl/backend.php'),
|
||||
(u'Publikacje', u'http://pccentre.pl/backend.php?mode=a'),
|
||||
(u'Sprz\u0119t komputerowy', u'http://pccentre.pl/backend.php?mode=n§ion=2'),
|
||||
(u'Oprogramowanie', u'http://pccentre.pl/backend.php?mode=n§ion=3'),
|
||||
(u'Gry komputerowe i konsole', u'http://pccentre.pl/backend.php?mode=n§ion=4'),
|
||||
(u'Internet', u'http://pccentre.pl/backend.php?mode=n§ion=7'),
|
||||
(u'Bezpiecze\u0144stwo', u'http://pccentre.pl/backend.php?mode=n§ion=5'),
|
||||
(u'Multimedia', u'http://pccentre.pl/backend.php?mode=n§ion=6'),
|
||||
(u'Biznes', u'http://pccentre.pl/backend.php?mode=n§ion=9')]
|
||||
|
||||
def print_version(self, url):
|
||||
return url.replace('show', 'print')
|
@ -75,9 +75,7 @@ class PCLab(BasicNewsRecipe):
|
||||
href = link.get('href', None)
|
||||
if href and href.startswith('/'):
|
||||
link['href'] = 'http://pclab.pl' + href
|
||||
# finally remove some tags
|
||||
# for r in soup.findAll('div', attrs={'class':['tags', 'index',
|
||||
# 'script_bxad_slot_display_list_bxad_slot', 'index first', 'zumi',
|
||||
# 'navigation']})
|
||||
for r in soup.findAll(name='a', href=re.compile(r'^https://www.skapiec.pl/')):
|
||||
r.extract()
|
||||
|
||||
return soup
|
||||
|
@ -1,36 +0,0 @@
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
|
||||
class Pikoboard(BasicNewsRecipe):
|
||||
title = u'Pikoboard.pl'
|
||||
__author__ = 'fenuks'
|
||||
description = u'Portal poświęcony takim urządzeniom jak: Raspberry Pi, XBMC, ODROID-X, BeagleBoard czy CuBox. Systemy operacyjne, modyfikacje oraz obudowy i innego rodzaju dodatki.' # noqa
|
||||
category = 'IT, open source, Linux, Raspberry Pi'
|
||||
language = 'pl'
|
||||
cover_url = 'http://picoboard.pl/wp-content/themes/portal/img/logo.jpg'
|
||||
extra_css = 'img.alignleft {float: left; margin-right: 5px;}'
|
||||
oldest_article = 7
|
||||
max_articles_per_feed = 100
|
||||
no_stylesheets = True
|
||||
remove_empty_feeds = True
|
||||
use_embedded_content = False
|
||||
keep_only_tags = [dict(id=['dzial', 'posts'])]
|
||||
remove_tags = [dict(attrs={'class': 'post-comments'})]
|
||||
remove_tags_after = dict(attrs={'class': 'entry clr'})
|
||||
feeds = [(u'Newsy', u'http://picoboard.pl/feed/atom/'),
|
||||
(u'Artyku\u0142y', u'http://picoboard.pl/category/artykuly/feed/')]
|
||||
|
||||
def append_page(self, soup, appendtag):
|
||||
tag = appendtag.find(attrs={'id': 'paginacja'})
|
||||
if tag:
|
||||
for nexturl in tag.findAll('a'):
|
||||
soup2 = self.index_to_soup(nexturl['href'])
|
||||
pagetext = soup2.find(attrs={'class': 'entry clr'})
|
||||
pos = len(appendtag.contents)
|
||||
appendtag.insert(pos, pagetext)
|
||||
for r in appendtag.findAll(attrs={'id': 'paginacja'}):
|
||||
r.extract()
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
self.append_page(soup, soup.body)
|
||||
return soup
|
@ -1,42 +0,0 @@
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
|
||||
class PolskaTimes(BasicNewsRecipe):
|
||||
title = u'Polska Times'
|
||||
__author__ = 'fenuks'
|
||||
description = u'Internetowe wydanie dziennika ogólnopolskiego Polska The Times. Najświeższe informacje: wydarzenia w kraju i na świecie, reportaże, poradniki, opinie.' # noqa
|
||||
category = 'newspaper'
|
||||
language = 'pl'
|
||||
masthead_url = 'http://s.polskatimes.pl/g/logo_naglowek/polska.gif?17'
|
||||
oldest_article = 7
|
||||
encoding = 'iso-8859-2'
|
||||
max_articles_per_feed = 100
|
||||
remove_empty_feeds = True
|
||||
no_stylesheets = True
|
||||
use_embedded_content = False
|
||||
ignore_duplicate_articles = {'title', 'url'}
|
||||
remove_tags_after = dict(attrs={'src': 'http://nm.dz.com.pl/dz.png'})
|
||||
remove_tags = [dict(id='mat-podobne'), dict(name='a', attrs={
|
||||
'class': 'czytajDalej'}), dict(attrs={'src': 'http://nm.dz.com.pl/dz.png'})]
|
||||
feeds = [
|
||||
(u'Fakty', u'http://polskatimes.feedsportal.com/c/32980/f/533648/index.rss'),
|
||||
(u'Opinie', u'http://www.polskatimes.pl/rss/opinie.xml'),
|
||||
(u'Sport', u'http://polskatimes.feedsportal.com/c/32980/f/533649/index.rss'),
|
||||
(u'Pieni\u0105dze', u'http://polskatimes.feedsportal.com/c/32980/f/533657/index.rss'),
|
||||
(u'Twoje finanse', u'http://www.polskatimes.pl/rss/twojefinanse.xml'),
|
||||
(u'Kultura', u'http://polskatimes.feedsportal.com/c/32980/f/533650/index.rss'),
|
||||
(u'Dodatki', u'http://www.polskatimes.pl/rss/dodatki.xml')]
|
||||
|
||||
def print_version(self, url):
|
||||
return url.replace('artykul', 'drukuj')
|
||||
|
||||
def skip_ad_pages(self, soup):
|
||||
if 'Advertisement' in soup.title:
|
||||
nexturl = soup.find('a')['href']
|
||||
return self.index_to_soup(nexturl, raw=True)
|
||||
|
||||
def get_cover_url(self):
|
||||
soup = self.index_to_soup(
|
||||
'http://www.prasa24.pl/gazeta/metropolia-warszawska/')
|
||||
self.cover_url = soup.find(id='pojemnik').img['src']
|
||||
return getattr(self, 'cover_url', self.cover_url)
|
@ -21,7 +21,8 @@ class Polter(BasicNewsRecipe):
|
||||
ignore_duplicate_articles = {'title', 'url'}
|
||||
|
||||
keep_only_tags = [dict(attrs={'class': 'boxcontent'})]
|
||||
remove_tags = [dict(id='komentarze')]
|
||||
remove_tags = [dict(id='komentarze'),
|
||||
dict(name='div',attrs={'class':'ostatnieArtykuly'})]
|
||||
remove_tags_after = dict(id='komentarze')
|
||||
|
||||
feeds = [
|
||||
@ -36,8 +37,7 @@ class Polter(BasicNewsRecipe):
|
||||
(u'Gry planszowe', 'http://planszowki.polter.pl/wiesci,rss.html'),
|
||||
(u'Gry PC', 'http://gry.polter.pl/wiesci,rss.html'),
|
||||
(u'Gry konsolowe', 'http://konsole.polter.pl/wiesci,rss.html'),
|
||||
(u'Konwenty', 'http://konwenty.polter.pl/wiesci,rss.html'),
|
||||
(u'Blogi', 'http://polter.pl/blogi,rss.html')]
|
||||
(u'Konwenty', 'http://konwenty.polter.pl/wiesci,rss.html')]
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
for s in soup.findAll(attrs={'style': re.compile('float: ?left')}):
|
||||
@ -65,3 +65,6 @@ class Polter(BasicNewsRecipe):
|
||||
for r in soup.findAll(name='a', href=re.compile(r'^http://www.ceneo.pl/')):
|
||||
r.extract()
|
||||
return soup
|
||||
|
||||
def preprocess_raw_html(self, raw_html, url):
|
||||
return raw_html.replace('<br /><br /><h3>Czytaj również</h3>', '')
|
||||
|
@ -1,63 +0,0 @@
|
||||
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:fdm=marker:ai
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
|
||||
class PoradniaPWN(BasicNewsRecipe):
|
||||
title = u'Poradnia Językowa PWN'
|
||||
__author__ = 'fenuks'
|
||||
description = u'Internetowa poradnia językowa Wydawnictwa Naukowego PWN. Poradnię prowadzi Redaktor Naczelny Słowników Języka Polskiego, prof. Mirosław Bańko. Pomagają mu eksperci - znani polscy językoznawcy. Współpracuje z nami m.in. prof. Jerzy Bralczyk oraz dr Jan Grzenia.' # noqa
|
||||
category = 'language'
|
||||
language = 'pl'
|
||||
oldest_article = 14
|
||||
max_articles_per_feed = 100000
|
||||
INDEX = "http://poradnia.pwn.pl/"
|
||||
no_stylesheets = True
|
||||
remove_attributes = ['style']
|
||||
remove_javascript = True
|
||||
use_embedded_content = False
|
||||
keep_only_tags = [dict(name="div", attrs={"class": "searchhi"})]
|
||||
feeds = [(u'Poradnia', u'http://rss.pwn.pl/poradnia.rss')]
|
||||
|
||||
'''def find_articles(self, url):
|
||||
articles = []
|
||||
soup=self.index_to_soup(url)
|
||||
counter = int(soup.find(name='p', attrs={'class':'count'}).findAll('b')[-1].string)
|
||||
counter = 500
|
||||
pos = 0
|
||||
next = url
|
||||
while next:
|
||||
soup=self.index_to_soup(next)
|
||||
tag=soup.find(id="listapytan")
|
||||
art=tag.findAll(name='li')
|
||||
for i in art:
|
||||
if i.h4:
|
||||
title=i.h4.a.string
|
||||
url=self.INDEX+i.h4.a['href']
|
||||
#date=soup.find(id='footer').ul.li.string[41:-1]
|
||||
articles.append({'title' : title,
|
||||
'url' : url,
|
||||
'date' : '',
|
||||
'description' : ''
|
||||
})
|
||||
pos += 10
|
||||
if not pos >=counter:
|
||||
next = 'http://poradnia.pwn.pl/lista.php?kat=18&od=' + str(pos)
|
||||
print u'Tworzenie listy artykułów dla', next
|
||||
else:
|
||||
next = None
|
||||
print articles
|
||||
return articles
|
||||
|
||||
def parse_index(self):
|
||||
feeds = []
|
||||
feeds.append((u"Poradnia", self.find_articles('http://poradnia.pwn.pl/lista.php')))
|
||||
|
||||
return feeds'''
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
for i in soup.findAll(name=['ul', 'li']):
|
||||
i.name = "div"
|
||||
for z in soup.findAll(name='a'):
|
||||
if not z['href'].startswith('http'):
|
||||
z['href'] = 'http://poradnia.pwn.pl/' + z['href']
|
||||
return soup
|
@ -29,9 +29,3 @@ class ppeRecipe(BasicNewsRecipe):
|
||||
('Recenzje', 'http://ppe.pl/rss-recenzje.html'),
|
||||
('Publicystyka', 'http://ppe.pl/rss-publicystyka.html'),
|
||||
]
|
||||
|
||||
def get_cover_url(self):
|
||||
soup = self.index_to_soup('http://www.ppe.pl/psx_extreme.html')
|
||||
part = soup.find(attrs={'class': 'archiwum-foto'})['style']
|
||||
part = re.search("'(.+)'", part).group(1).replace('_min', '')
|
||||
return 'http://www.ppe.pl' + part
|
||||
|
@ -1,43 +0,0 @@
|
||||
#!/usr/bin/env python2
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__author__ = 'teepel <teepel44@gmail.com>'
|
||||
|
||||
'''
|
||||
http://prawica.net
|
||||
'''
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
|
||||
class prawica_recipe(BasicNewsRecipe):
|
||||
title = u'prawica.net'
|
||||
__author__ = 'teepel <teepel44@gmail.com>'
|
||||
language = 'pl'
|
||||
description = 'Wiadomości ze strony prawica.net'
|
||||
INDEX = 'http://prawica.net/'
|
||||
remove_empty_feeds = True
|
||||
oldest_article = 1
|
||||
max_articles_per_feed = 100
|
||||
remove_javascript = True
|
||||
no_stylesheets = True
|
||||
|
||||
feeds = [(u'all', u'http://prawica.net/all/feed')]
|
||||
|
||||
keep_only_tags = []
|
||||
# this line should show title of the article, but it doesnt work
|
||||
keep_only_tags.append(dict(name='h1', attrs={'class': 'print-title'}))
|
||||
keep_only_tags.append(dict(name='div', attrs={'class': 'content'}))
|
||||
|
||||
remove_tags = []
|
||||
remove_tags.append(dict(name='div', attrs={
|
||||
'class': 'field field-type-viewfield field-field-autor2'}))
|
||||
remove_tags.append(dict(name='div', attrs={
|
||||
'class': 'field field-type-viewfield field-field-publikacje-autora'}))
|
||||
remove_tags.append(dict(name='div', attrs={
|
||||
'id': 'rate-widget-2 rate-widget clear-block rate-average rate-widget-fivestar rate-daa7512627f21dcf15e0af47e5279f0e rate-processed'}))
|
||||
remove_tags_after = [
|
||||
(dict(name='div', attrs={'class': 'field-label-inline-first'}))]
|
||||
|
||||
def print_version(self, url):
|
||||
return url.replace('http://prawica.net/', 'http://prawica.net/print/')
|
@ -1,34 +0,0 @@
|
||||
#!/usr/bin/env python2
|
||||
|
||||
'''
|
||||
www.presseurop.eu/pl
|
||||
'''
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__author__ = 'teepel <teepel44@gmail.com>'
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
import re
|
||||
|
||||
|
||||
class presseurop(BasicNewsRecipe):
|
||||
title = u'Presseurop'
|
||||
description = u'Najlepsze artykuły z prasy europejskiej'
|
||||
language = 'pl'
|
||||
oldest_article = 7
|
||||
max_articles_per_feed = 100
|
||||
auto_cleanup = True
|
||||
remove_empty_feeds = True
|
||||
|
||||
feeds = [
|
||||
(u'Polityka', u'http://www.presseurop.eu/pl/taxonomy/term/1/%2A/feed'),
|
||||
(u'Społeczeństwo', u'http://www.presseurop.eu/pl/taxonomy/term/2/%2A/feed'),
|
||||
(u'Gospodarka', u'http://www.presseurop.eu/pl/taxonomy/term/3/%2A/feed'),
|
||||
(u'Kultura i debaty', u'http://www.presseurop.eu/pl/taxonomy/term/4/%2A/feed'),
|
||||
(u'UE i Świat', u'http://www.presseurop.eu/pl/taxonomy/term/5/%2A/feed')
|
||||
]
|
||||
|
||||
preprocess_regexps = [
|
||||
(re.compile(r'\|.*</title>', re.DOTALL | re.IGNORECASE),
|
||||
lambda match: '</title>'),
|
||||
]
|