Various new and updated Polish news sources

This commit is contained in:
Kovid Goyal 2013-03-10 19:22:22 +05:30
commit a2e1776d76
14 changed files with 131 additions and 20 deletions

View File

@ -1,7 +1,7 @@
from calibre.web.feeds.news import BasicNewsRecipe
class Archeowiesci(BasicNewsRecipe):
title = u'Archeowiesci'
title = u'Archeowieści'
__author__ = 'fenuks'
category = 'archeology'
language = 'pl'

View File

@ -0,0 +1,43 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
__license__ = 'GPL v3'
__copyright__ = u'Łukasz Grąbczewski 2013'
__version__ = '1.0'
'''
bachormagazyn.pl
'''
from calibre.web.feeds.news import BasicNewsRecipe
class bachormagazyn(BasicNewsRecipe):
__author__ = u'Łukasz Grączewski'
title = u'Bachor Magazyn'
description = u'Alternatywny magazyn o alternatywach rodzicielstwa'
language = 'pl'
publisher = 'Bachor Mag.'
publication_type = 'magazine'
masthead_url = 'http://bachormagazyn.pl/wp-content/uploads/2011/10/bachor_header1.gif'
no_stylesheets = True
remove_javascript = True
use_embedded_content = False
remove_empty_feeds = True
oldest_article = 32 #monthly +1
max_articles_per_feed = 100
feeds = [
(u'Bezradnik dla nieudacznych rodziców', u'http://bachormagazyn.pl/feed/')
]
keep_only_tags = []
keep_only_tags.append(dict(name = 'div', attrs = {'id' : 'content'}))
remove_tags = []
remove_tags.append(dict(attrs = {'id' : 'nav-above'}))
remove_tags.append(dict(attrs = {'id' : 'nav-below'}))
remove_tags.append(dict(attrs = {'id' : 'comments'}))
remove_tags.append(dict(attrs = {'class' : 'entry-info'}))
remove_tags.append(dict(attrs = {'class' : 'comments-link'}))
remove_tags.append(dict(attrs = {'class' : 'sharedaddy sd-sharing-enabled'}))

View File

@ -18,6 +18,7 @@ class FrazPC(BasicNewsRecipe):
max_articles_per_feed = 100
use_embedded_content = False
no_stylesheets = True
remove_empty_feeds = True
cover_url='http://www.frazpc.pl/images/logo.png'
feeds = [
(u'Aktualno\u015bci', u'http://www.frazpc.pl/feed/aktualnosci'),

BIN
recipes/icons/gildia_pl.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 3.5 KiB

BIN
recipes/icons/nowy_obywatel.png Executable file

Binary file not shown.

After

Width:  |  Height:  |  Size: 480 B

View File

@ -16,6 +16,7 @@ class KopalniaWiedzy(BasicNewsRecipe):
max_articles_per_feed = 100
INDEX = u'http://kopalniawiedzy.pl/'
remove_javascript = True
remove_empty_feeds = True
no_stylesheets = True
remove_tags = [{'name':'p', 'attrs': {'class': 'keywords'}}, {'name':'div', 'attrs': {'class':'sexy-bookmarks sexy-bookmarks-bg-caring'}}, {'name':'div', 'attrs': {'class':'article-time-and-cat'}}, {'name':'p', 'attrs': {'class':'tags'}}]

View File

@ -3,7 +3,7 @@ from calibre.ebooks.BeautifulSoup import BeautifulSoup as bs
class KurierGalicyjski(BasicNewsRecipe):
title = u'Kurier Galicyjski'
__author__ = 'fenuks'
#description = u''
description = u'Kurier Galicyjski - największa gazeta dla Polaków na Ukrainie. Bieżące wydarzenia z życia polskiej mniejszości, historia, kultura, polityka, reportaże.'
category = 'news'
language = 'pl'
cover_url = 'http://www.duszki.pl/Kurier_galicyjski_bis2_small.gif'

View File

@ -3,7 +3,7 @@ from calibre.web.feeds.news import BasicNewsRecipe
class KurierSzczecinski(BasicNewsRecipe):
title = u'Kurier Szczeci\u0144ski'
__author__ = 'fenuks'
description = u'24Kurier jest portalem Kuriera Szczecińskiego. Zawiera aktualności ze Szczecina oraz wiadomości regionalne z województwa zachodniopomorskiego. '
description = u'24Kurier jest portalem Kuriera Szczecińskiego. Zawiera aktualności ze Szczecina oraz wiadomości regionalne z województwa zachodniopomorskiego.'
category = 'newspaper'
#publication_type = ''
language = 'pl'

View File

@ -3,7 +3,7 @@ from calibre.web.feeds.news import BasicNewsRecipe
class Niebezpiecznik_pl(BasicNewsRecipe):
title = u'Niebezpiecznik.pl'
__author__ = 'fenuks'
description = 'Niebezpiecznik.pl'
description = u'Niebezpiecznik.pl o bezpieczeństwie i nie...'
category = 'hacking, IT'
language = 'pl'
oldest_article = 8

View File

@ -0,0 +1,42 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
__license__ = 'GPL v3'
__copyright__ = u'Łukasz Grąbczewski 2013'
__version__ = '1.0'
'''
nowyobywatel.pl
'''
from calibre.web.feeds.news import BasicNewsRecipe
class nowyobywatel(BasicNewsRecipe):
__author__ = u'Łukasz Grączewski'
title = u'Nowy Obywatel'
description = u'Pismo na rzecz sprawiedliwości społecznej'
language = 'pl'
publisher = 'Stowarzyszenie „Obywatele Obywatelom”'
publication_type = 'magazine'
masthead_url = 'http://lewicowo.pl/wp-content/uploads/2011/11/nowy-obywatel1.png'
no_stylesheets = True
remove_javascript = True
use_embedded_content = True
remove_empty_feeds = True
oldest_article = 32 #monthly +1
max_articles_per_feed = 100
simultaneous_downloads = 20
feeds = [
(u'Aktualności', u'http://nowyobywatel.pl/category/aktualnosci/feed/'),
(u'Opinie', u'http://nowyobywatel.pl/category/opinie/feed/'),
(u'Nasze rozmowy', u'http://nowyobywatel.pl/category/nasze-rozmowy/feed/'),
(u'Inspiracje', u'http://nowyobywatel.pl/category/inspiracje/feed/')
]
remove_tags = []
remove_tags.append(dict(attrs = {'class' : 'post-date'}))
remove_tags.append(dict(attrs = {'class' : 'printfriendly'}))
remove_tags.append(dict(attrs = {'class' : 'social4i'}))

View File

@ -1,7 +1,7 @@
from calibre.web.feeds.news import BasicNewsRecipe
class Swiat_Obrazu(BasicNewsRecipe):
title = u'Swiat Obrazu'
title = u'Świat Obrazu'
__author__ = 'fenuks'
description = u'Internetowy Dziennik o Fotografii i Wideo www.SwiatObrazu.pl to źródło informacji o technice fotografii i wideo, o sprzęcie najbardziej znanych i uznanych firm: Canon, Nikon, Sony, Hasselblad i wielu innych. Znajdziecie tu programy do obróbki zdjęć, forum foto i forum wideo i galerie zdjęć. Codziennie najświeższe informacje: aktualności, testy, poradniki, wywiady, felietony. Swiatobrazu.pl stale organizuje konkursy oraz warsztaty fotograficzne i wideo.'
category = 'photography'

View File

@ -19,6 +19,7 @@ class swiatczytnikow(BasicNewsRecipe):
feeds = [(u'Świat Czytników - wpisy', u'http://swiatczytnikow.pl/feed')]
remove_tags = [dict(name = 'ul', attrs = {'class' : 'similar-posts'})]
remove_tags = [dict(name = 'ul', attrs = {'class' : 'similar-posts'}),
dict(name = 'div', attrs = {'class' : 'feedflare'})]
preprocess_regexps = [(re.compile(u'<h3>Czytaj dalej:</h3>'), lambda match: '')]

View File

@ -16,11 +16,31 @@ class telepolis(BasicNewsRecipe):
use_embedded_content = False
feeds = [
(u'Wiadomości', u'http://www.telepolis.pl/rss/news.php')#,
#(u'Artykuły', u'http://www.telepolis.pl/rss/artykuly.php')
(u'Wiadomości', u'http://www.telepolis.pl/rss,2,5,0.html')
]
keep_only_tags = [
dict(name='div', attrs={'class':'flol w510'}),
dict(name='div', attrs={'class':'main_tresc'}),
dict(name='div', attrs={'class':'main_tresc_news'})
]
def append_page(self, soup, appendtag):
chpage= appendtag.find(attrs={'class':'str'})
if chpage:
for page in chpage.findAll('a'):
if page.renderContents() == 'Następna &rsaquo;':
break
soup2 = self.index_to_soup(page['href'])
pagetext = soup2.find(attrs={'class':'main_tresc'})
pos = len(appendtag.contents)
appendtag.insert(pos, pagetext)
for r in appendtag.findAll(attrs={'class':'str'}):
r.extract()
def preprocess_html(self, soup):
self.append_page(soup, soup.body)
for image in soup.findAll('img'):
if 'm.jpg' in image['src']:
image['src'] = image['src'].replace('m.jpg', '.jpg')
return soup

View File

@ -3,12 +3,13 @@
__license__ = 'GPL v3'
__copyright__ = u'Łukasz Grąbczewski 2012-2013'
__version__ = '1.1'
__version__ = '1.2'
'''
zw.com.pl
'''
import re
from calibre.web.feeds.news import BasicNewsRecipe
class zyciewarszawy(BasicNewsRecipe):
@ -17,12 +18,14 @@ class zyciewarszawy(BasicNewsRecipe):
description = u'Wiadomości z Warszawy'
language = 'pl'
publisher = 'Presspublica'
publication_type = 'newspapper'
publication_type = 'newspaper'
masthead_url = 'http://www.zw.com.pl/static/img/logo_zw.gif'
no_stylesheets = True
remove_javascript = True
use_embedded_content = False
remove_empty_feeds = True
oldest_article = 1 #daily news only
oldest_article = 1.5 #last 36h
max_articles_per_feed = 100
feeds = [(u'Najnowsze', u'http://www.zw.com.pl/rss/1.html')]
@ -31,15 +34,15 @@ class zyciewarszawy(BasicNewsRecipe):
keep_only_tags.append(dict(name = 'div', attrs = {'id' : 'storyp'}))
remove_tags = []
remove_tags.append(dict(name = 'div', attrs = {'class' : 'authordate'}))
remove_tags.append(dict(name = 'div', attrs = {'class' : 'author'}))
'''remove_tags.append(dict(name = 'div', attrs = {'class' : 'seealso'}))'''
remove_tags.append(dict(name = 'div', attrs = {'class' : 'more'}))
remove_tags.append(dict(name = 'div', attrs = {'class' : 'clr'}))
remove_tags.append(dict(name = 'div', attrs = {'id' : 'adk_0'}))
remove_tags.append(dict(name = 'div', attrs = {'id' : 'adsense_0'}))
remove_tags.append(dict(name = 'div', attrs = {'id' : 'share_bottom'}))
remove_tags.append(dict(name = 'div', attrs = {'id' : 'copyright_law'}))
remove_tags.append(dict(attrs = {'class' : 'author'}))
remove_tags.append(dict(attrs = {'class' : 'more'}))
remove_tags.append(dict(attrs = {'class' : 'clr'}))
remove_tags.append(dict(attrs = {'id' : 'adk_0'}))
remove_tags.append(dict(attrs = {'id' : 'adsense_0'}))
remove_tags.append(dict(attrs = {'id' : 'share_bottom'}))
remove_tags.append(dict(attrs = {'id' : 'copyright_law'}))
preprocess_regexps = [(re.compile(r',3.jpg'), lambda m: ',2.jpg')]
def print_version(self, url):
url += "?print=tak"