mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Various polish news sources by fenuks
This commit is contained in:
parent
66ea17fe86
commit
8025977447
21
recipes/archeowiesci.recipe
Normal file
21
recipes/archeowiesci.recipe
Normal file
@ -0,0 +1,21 @@
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class Archeowiesci(BasicNewsRecipe):
|
||||
title = u'Archeowiesci'
|
||||
__author__ = 'fenuks'
|
||||
category = 'archeology'
|
||||
language = 'pl'
|
||||
cover_url='http://archeowiesci.pl/wp-content/uploads/2011/05/Archeowiesci2-115x115.jpg'
|
||||
oldest_article = 7
|
||||
max_articles_per_feed = 100
|
||||
auto_cleanup = True
|
||||
remove_tags=[dict(name='span', attrs={'class':['post-ratings', 'post-ratings-loading']})]
|
||||
feeds = [(u'Archeowieści', u'http://archeowiesci.pl/feed/')]
|
||||
|
||||
def parse_feeds (self):
|
||||
feeds = BasicNewsRecipe.parse_feeds(self)
|
||||
for feed in feeds:
|
||||
for article in feed.articles[:]:
|
||||
if 'subskrypcja' in article.title:
|
||||
feed.articles.remove(article)
|
||||
return feeds
|
23
recipes/eioba.recipe
Normal file
23
recipes/eioba.recipe
Normal file
@ -0,0 +1,23 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class eioba(BasicNewsRecipe):
|
||||
title = u'eioba'
|
||||
__author__ = 'fenuks'
|
||||
cover_url = 'http://www.eioba.org/lay/logo_pl_v3.png'
|
||||
language = 'pl'
|
||||
oldest_article = 7
|
||||
remove_empty_feeds= True
|
||||
max_articles_per_feed = 100
|
||||
extra_css = '#ctl0_body_Topic {font-weight: bold; font-size:30px;}'
|
||||
keep_only_tags=[dict(id=['ctl0_body_Topic', 'articleContent'])]
|
||||
feeds = [(u'Wszyskie kategorie', u'http://feeds.eioba.pl/eioba-pl-top'),
|
||||
(u'Technologia', u'http://www.eioba.pl/feed/categories/1.xml'),
|
||||
(u'Nauka', u'http://www.eioba.pl/feed/categories/12.xml'),
|
||||
(u'Finanse', u'http://www.eioba.pl/feed/categories/7.xml'),
|
||||
(u'Życie', u'http://www.eioba.pl/feed/categories/5.xml'),
|
||||
(u'Zainteresowania', u'http://www.eioba.pl/feed/categories/420.xml'),
|
||||
(u'Społeczeństwo', u'http://www.eioba.pl/feed/categories/8.xml'),
|
||||
(u'Rozrywka', u'http://www.eioba.pl/feed/categories/10.xml'),
|
||||
(u'Rożne', u'http://www.eioba.pl/feed/categories/9.xml')
|
||||
]
|
66
recipes/focus_pl.recipe
Normal file
66
recipes/focus_pl.recipe
Normal file
@ -0,0 +1,66 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class Focus_pl(BasicNewsRecipe):
|
||||
title = u'Focus.pl'
|
||||
oldest_article = 15
|
||||
max_articles_per_feed = 100
|
||||
__author__ = 'fenuks'
|
||||
language = 'pl'
|
||||
description ='polish scientific monthly magazine'
|
||||
category='magazine'
|
||||
cover_url=''
|
||||
remove_empty_feeds= True
|
||||
no_stylesheets=True
|
||||
remove_tags_before=dict(name='div', attrs={'class':'h2 h2f'})
|
||||
remove_tags_after=dict(name='div', attrs={'class':'clear'})
|
||||
feeds = [(u'Wszystkie kategorie', u'http://focus.pl.feedsportal.com/c/32992/f/532692/index.rss'),
|
||||
(u'Nauka', u'http://focus.pl.feedsportal.com/c/32992/f/532693/index.rss'),
|
||||
(u'Historia', u'http://focus.pl.feedsportal.com/c/32992/f/532694/index.rss'),
|
||||
(u'Cywilizacja', u'http://focus.pl.feedsportal.com/c/32992/f/532695/index.rss'),
|
||||
(u'Sport', u'http://focus.pl.feedsportal.com/c/32992/f/532696/index.rss'),
|
||||
(u'Technika', u'http://focus.pl.feedsportal.com/c/32992/f/532697/index.rss'),
|
||||
(u'Przyroda', u'http://focus.pl.feedsportal.com/c/32992/f/532698/index.rss'),
|
||||
(u'Technologie', u'http://focus.pl.feedsportal.com/c/32992/f/532699/index.rss'),
|
||||
(u'Warto wiedzieć', u'http://focus.pl.feedsportal.com/c/32992/f/532700/index.rss'),
|
||||
|
||||
|
||||
|
||||
]
|
||||
|
||||
def skip_ad_pages(self, soup):
|
||||
tag=soup.find(name='a')
|
||||
if tag:
|
||||
new_soup=self.index_to_soup(tag['href']+ 'do-druku/1/', raw=True)
|
||||
return new_soup
|
||||
|
||||
def append_page(self, appendtag):
|
||||
tag=appendtag.find(name='div', attrs={'class':'arrows'})
|
||||
if tag:
|
||||
nexturl='http://www.focus.pl/'+tag.a['href']
|
||||
for rem in appendtag.findAll(name='div', attrs={'class':'klik-nav'}):
|
||||
rem.extract()
|
||||
while nexturl:
|
||||
soup2=self.index_to_soup(nexturl)
|
||||
nexturl=None
|
||||
pagetext=soup2.find(name='div', attrs={'class':'txt'})
|
||||
tag=pagetext.find(name='div', attrs={'class':'arrows'})
|
||||
for r in tag.findAll(name='a'):
|
||||
if u'Następne' in r.string:
|
||||
nexturl='http://www.focus.pl/'+r['href']
|
||||
for rem in pagetext.findAll(name='div', attrs={'class':'klik-nav'}):
|
||||
rem.extract()
|
||||
pos = len(appendtag.contents)
|
||||
appendtag.insert(pos, pagetext)
|
||||
|
||||
def get_cover_url(self):
|
||||
soup=self.index_to_soup('http://www.focus.pl/magazyn/')
|
||||
tag=soup.find(name='div', attrs={'class':'clr fl'})
|
||||
if tag:
|
||||
self.cover_url='http://www.focus.pl/' + tag.a['href']
|
||||
return getattr(self, 'cover_url', self.cover_url)
|
||||
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
self.append_page(soup.body)
|
||||
return soup
|
83
recipes/gazeta_wyborcza.recipe
Normal file
83
recipes/gazeta_wyborcza.recipe
Normal file
@ -0,0 +1,83 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class Gazeta_Wyborcza(BasicNewsRecipe):
|
||||
title = u'Gazeta Wyborcza'
|
||||
__author__ = 'fenuks'
|
||||
cover_url = 'http://bi.gazeta.pl/im/5/10285/z10285445AA.jpg'
|
||||
language = 'pl'
|
||||
description ='news from gazeta.pl'
|
||||
category='newspaper'
|
||||
INDEX='http://wyborcza.pl'
|
||||
remove_empty_feeds= True
|
||||
oldest_article = 3
|
||||
max_articles_per_feed = 100
|
||||
remove_javascript=True
|
||||
no_stylesheets=True
|
||||
remove_tags_before=dict(id='k0')
|
||||
remove_tags_after=dict(id='banP4')
|
||||
remove_tags=[dict(name='div', attrs={'class':'rel_box'}), dict(attrs={'class':['date', 'zdjP', 'zdjM', 'pollCont', 'rel_video', 'brand', 'txt_upl']}), dict(name='div', attrs={'id':'footer'})]
|
||||
feeds = [(u'Kraj', u'http://rss.feedsportal.com/c/32739/f/530266/index.rss'), (u'\u015awiat', u'http://rss.feedsportal.com/c/32739/f/530270/index.rss'),
|
||||
(u'Wyborcza.biz', u'http://wyborcza.biz/pub/rss/wyborcza_biz_wiadomosci.htm'),
|
||||
(u'Komentarze', u'http://rss.feedsportal.com/c/32739/f/530312/index.rss'),
|
||||
(u'Kultura', u'http://rss.gazeta.pl/pub/rss/gazetawyborcza_kultura.xml'),
|
||||
(u'Nauka', u'http://rss.feedsportal.com/c/32739/f/530269/index.rss'), (u'Opinie', u'http://rss.gazeta.pl/pub/rss/opinie.xml'), (u'Gazeta \u015awi\u0105teczna', u'http://rss.feedsportal.com/c/32739/f/530431/index.rss'), (u'Du\u017cy Format', u'http://rss.feedsportal.com/c/32739/f/530265/index.rss'), (u'Witamy w Polsce', u'http://rss.feedsportal.com/c/32739/f/530476/index.rss'), (u'M\u0119ska Muzyka', u'http://rss.feedsportal.com/c/32739/f/530337/index.rss'), (u'Lata Lec\u0105', u'http://rss.feedsportal.com/c/32739/f/530326/index.rss'), (u'Solidarni z Tybetem', u'http://rss.feedsportal.com/c/32739/f/530461/index.rss'), (u'W pon. - \u017bakowski', u'http://rss.feedsportal.com/c/32739/f/530491/index.rss'), (u'We wt. - Kolenda-Zalewska', u'http://rss.feedsportal.com/c/32739/f/530310/index.rss'), (u'\u015aroda w \u015brod\u0119', u'http://rss.feedsportal.com/c/32739/f/530428/index.rss'), (u'W pi\u0105tek - Olejnik', u'http://rss.feedsportal.com/c/32739/f/530364/index.rss'), (u'Nekrologi', u'http://rss.feedsportal.com/c/32739/f/530358/index.rss')
|
||||
]
|
||||
|
||||
def skip_ad_pages(self, soup):
|
||||
tag=soup.find(name='a', attrs={'class':'btn'})
|
||||
if tag:
|
||||
new_soup=self.index_to_soup(tag['href'], raw=True)
|
||||
return new_soup
|
||||
|
||||
|
||||
def append_page(self, soup, appendtag):
|
||||
loop=False
|
||||
tag = soup.find('div', attrs={'id':'Str'})
|
||||
if appendtag.find('div', attrs={'id':'Str'}):
|
||||
nexturl=tag.findAll('a')
|
||||
appendtag.find('div', attrs={'id':'Str'}).extract()
|
||||
loop=True
|
||||
if appendtag.find(id='source'):
|
||||
appendtag.find(id='source').extract()
|
||||
while loop:
|
||||
loop=False
|
||||
for link in nexturl:
|
||||
if u'następne' in link.string:
|
||||
url= self.INDEX + link['href']
|
||||
soup2 = self.index_to_soup(url)
|
||||
pagetext = soup2.find(id='artykul')
|
||||
pos = len(appendtag.contents)
|
||||
appendtag.insert(pos, pagetext)
|
||||
tag = soup2.find('div', attrs={'id':'Str'})
|
||||
nexturl=tag.findAll('a')
|
||||
loop=True
|
||||
|
||||
def gallery_article(self, appendtag):
|
||||
tag=appendtag.find(id='container_gal')
|
||||
if tag:
|
||||
nexturl=appendtag.find(id='gal_btn_next').a['href']
|
||||
appendtag.find(id='gal_navi').extract()
|
||||
while nexturl:
|
||||
soup2=self.index_to_soup(nexturl)
|
||||
pagetext=soup2.find(id='container_gal')
|
||||
nexturl=pagetext.find(id='gal_btn_next')
|
||||
if nexturl:
|
||||
nexturl=nexturl.a['href']
|
||||
pos = len(appendtag.contents)
|
||||
appendtag.insert(pos, pagetext)
|
||||
rem=appendtag.find(id='gal_navi')
|
||||
if rem:
|
||||
rem.extract()
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
self.append_page(soup, soup.body)
|
||||
if soup.find(id='container_gal'):
|
||||
self.gallery_article(soup.body)
|
||||
return soup
|
||||
|
||||
def print_version(self, url):
|
||||
if 'http://wyborcza.biz/biznes/' not in url:
|
||||
return url
|
||||
else:
|
||||
return url.replace('http://wyborcza.biz/biznes/1', 'http://wyborcza.biz/biznes/2029020')
|
BIN
recipes/icons/archeowiesci.png
Normal file
BIN
recipes/icons/archeowiesci.png
Normal file
Binary file not shown.
After Width: | Height: | Size: 718 B |
BIN
recipes/icons/eioba.png
Normal file
BIN
recipes/icons/eioba.png
Normal file
Binary file not shown.
After Width: | Height: | Size: 908 B |
BIN
recipes/icons/focus_pl.png
Normal file
BIN
recipes/icons/focus_pl.png
Normal file
Binary file not shown.
After Width: | Height: | Size: 695 B |
BIN
recipes/icons/gazeta_wyborcza.png
Normal file
BIN
recipes/icons/gazeta_wyborcza.png
Normal file
Binary file not shown.
After Width: | Height: | Size: 221 B |
BIN
recipes/icons/konflikty_zbrojne.png
Normal file
BIN
recipes/icons/konflikty_zbrojne.png
Normal file
Binary file not shown.
After Width: | Height: | Size: 320 B |
15
recipes/konflikty_zbrojne.recipe
Normal file
15
recipes/konflikty_zbrojne.recipe
Normal file
@ -0,0 +1,15 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class Konflikty(BasicNewsRecipe):
|
||||
title = u'Konflikty Zbrojne'
|
||||
__author__ = 'fenuks'
|
||||
cover_url = 'http://www.konflikty.pl/images/tapety_logo.jpg'
|
||||
language = 'pl'
|
||||
description ='military news'
|
||||
category='military, history'
|
||||
oldest_article = 7
|
||||
max_articles_per_feed = 100
|
||||
auto_cleanup = True
|
||||
|
||||
feeds = [(u'Aktualności', u'http://www.konflikty.pl/rss_aktualnosci_10.xml'), (u'Artyku\u0142y', u'http://www.konflikty.pl/rss_artykuly_10.xml'), (u'Relacje', u'http://www.konflikty.pl/rss_relacje_10.xml'), (u'Recenzje', u'http://www.konflikty.pl/rss_recenzje_10.xml')]
|
15
recipes/naczytniki.recipe
Normal file
15
recipes/naczytniki.recipe
Normal file
@ -0,0 +1,15 @@
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class naczytniki(BasicNewsRecipe):
|
||||
title = u'naczytniki.pl'
|
||||
__author__ = 'fenuks'
|
||||
cover_url = 'http://naczytniki.pl/wp-content/uploads/2010/08/logo_nc28.png'
|
||||
language = 'pl'
|
||||
description ='everything about e-readers'
|
||||
category='readers'
|
||||
oldest_article = 7
|
||||
max_articles_per_feed = 100
|
||||
remove_tags_after= dict(name='div', attrs={'class':'sociable'})
|
||||
keep_only_tags=[dict(name='div', attrs={'class':'post'})]
|
||||
remove_tags=[dict(name='span', attrs={'class':'comments'}), dict(name='div', attrs={'class':'sociable'})]
|
||||
feeds = [(u'Wpisy', u'http://naczytniki.pl/?feed=rss2')]
|
47
recipes/nowa_fantastyka.recipe
Normal file
47
recipes/nowa_fantastyka.recipe
Normal file
@ -0,0 +1,47 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class Nowa_Fantastyka(BasicNewsRecipe):
|
||||
title = u'Nowa Fantastyka'
|
||||
oldest_article = 7
|
||||
__author__ = 'fenuks'
|
||||
language = 'pl'
|
||||
description ='site for fantasy readers'
|
||||
category='fantasy'
|
||||
max_articles_per_feed = 100
|
||||
INDEX='http://www.fantastyka.pl/'
|
||||
remove_tags_before=dict(attrs={'class':'belka1-tlo-md'})
|
||||
#remove_tags_after=dict(name='span', attrs={'class':'naglowek-oceny'})
|
||||
remove_tags_after=dict(name='td', attrs={'class':'belka1-bot'})
|
||||
remove_tags=[dict(attrs={'class':'avatar2'})]
|
||||
feeds = []
|
||||
|
||||
def find_articles(self, url):
|
||||
articles = []
|
||||
soup=self.index_to_soup(url)
|
||||
tag=soup.find(attrs={'class':'belka1-tlo-m'})
|
||||
art=tag.findAll(name='a', attrs={'class':'a-box'})
|
||||
for i in art:
|
||||
title=i.string
|
||||
url=self.INDEX+i['href']
|
||||
#date=soup.find(id='footer').ul.li.string[41:-1]
|
||||
articles.append({'title' : title,
|
||||
'url' : url,
|
||||
'date' : '',
|
||||
'description' : ''
|
||||
})
|
||||
return articles
|
||||
|
||||
def parse_index(self):
|
||||
feeds = []
|
||||
feeds.append((u"Opowiadania", self.find_articles('http://www.fantastyka.pl/3.html')))
|
||||
feeds.append((u"Publicystyka", self.find_articles('http://www.fantastyka.pl/6.html')))
|
||||
feeds.append((u"Hype Park", self.find_articles('http://www.fantastyka.pl/9.html')))
|
||||
|
||||
return feeds
|
||||
|
||||
def get_cover_url(self):
|
||||
soup = self.index_to_soup('http://www.fantastyka.pl/1.html')
|
||||
cover=soup.find(name='img', attrs={'class':'okladka'})
|
||||
self.cover_url=self.INDEX+ cover['src']
|
||||
return getattr(self, 'cover_url', self.cover_url)
|
Loading…
x
Reference in New Issue
Block a user