mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-06-23 15:30:45 -04:00
Various new and updated Polish news sources
This commit is contained in:
commit
bbbd6989a4
@ -14,6 +14,7 @@ class BenchmarkPl(BasicNewsRecipe):
|
|||||||
max_articles_per_feed = 100
|
max_articles_per_feed = 100
|
||||||
extra_css = 'ul {list-style-type: none;}'
|
extra_css = 'ul {list-style-type: none;}'
|
||||||
no_stylesheets = True
|
no_stylesheets = True
|
||||||
|
use_embedded_content = False
|
||||||
#remove_attributes = ['style']
|
#remove_attributes = ['style']
|
||||||
preprocess_regexps = [(re.compile(ur'<h3><span style="font-size: small;"> Zobacz poprzednie <a href="http://www.benchmark.pl/news/zestawienie/grupa_id/135">Opinie dnia:</a></span>.*</body>', re.DOTALL|re.IGNORECASE), lambda match: '</body>'), (re.compile(ur'Więcej o .*?</ul>', re.DOTALL|re.IGNORECASE), lambda match: '')]
|
preprocess_regexps = [(re.compile(ur'<h3><span style="font-size: small;"> Zobacz poprzednie <a href="http://www.benchmark.pl/news/zestawienie/grupa_id/135">Opinie dnia:</a></span>.*</body>', re.DOTALL|re.IGNORECASE), lambda match: '</body>'), (re.compile(ur'Więcej o .*?</ul>', re.DOTALL|re.IGNORECASE), lambda match: '')]
|
||||||
|
|
||||||
|
@ -23,8 +23,8 @@ class cdrinfo(BasicNewsRecipe):
|
|||||||
preprocess_regexps = [(re.compile(u'<p[^>]*?>Uprzejmie prosimy o przestrzeganie netykiety.+?www\.gravatar\.com</a>\.</p>', re.DOTALL), lambda match: '')]
|
preprocess_regexps = [(re.compile(u'<p[^>]*?>Uprzejmie prosimy o przestrzeganie netykiety.+?www\.gravatar\.com</a>\.</p>', re.DOTALL), lambda match: '')]
|
||||||
ignore_duplicate_articles = {'title', 'url'}
|
ignore_duplicate_articles = {'title', 'url'}
|
||||||
|
|
||||||
keep_only_tags = [dict(name='input', attrs={'name':'ref'}), dict(id='text')]
|
keep_only_tags = [dict(name='input', attrs={'name':'ref'}), dict(id=['text', 'text2'])]
|
||||||
remove_tags = [dict(attrs={'class':['navigation', 'sociable']}), dict(name='hr'), dict(id='respond')]
|
remove_tags = [dict(attrs={'class':['navigation', 'sociable', 'last6news']}), dict(name='hr'), dict(id='respond')]
|
||||||
remove_tags_after = dict(id='artnawigacja')
|
remove_tags_after = dict(id='artnawigacja')
|
||||||
feeds = [(u'Wiadomości', 'http://feeds.feedburner.com/cdrinfo'), (u'Recenzje', 'http://www.cdrinfo.pl/rss/rss_recenzje.php'),
|
feeds = [(u'Wiadomości', 'http://feeds.feedburner.com/cdrinfo'), (u'Recenzje', 'http://www.cdrinfo.pl/rss/rss_recenzje.php'),
|
||||||
(u'Konsole', 'http://konsole.cdrinfo.pl/rss/rss_konsole_news.xml'),
|
(u'Konsole', 'http://konsole.cdrinfo.pl/rss/rss_konsole_news.xml'),
|
||||||
|
@ -13,10 +13,11 @@ class Computerworld_pl(BasicNewsRecipe):
|
|||||||
oldest_article = 7
|
oldest_article = 7
|
||||||
max_articles_per_feed = 100
|
max_articles_per_feed = 100
|
||||||
remove_attributes = ['style',]
|
remove_attributes = ['style',]
|
||||||
|
use_embedded_content = False
|
||||||
preprocess_regexps = [(re.compile(u'Zobacz również:', re.IGNORECASE), lambda m: ''), (re.compile(ur'[*]+reklama[*]+', re.IGNORECASE), lambda m: ''),]
|
preprocess_regexps = [(re.compile(u'Zobacz również:', re.IGNORECASE), lambda m: ''), (re.compile(ur'[*]+reklama[*]+', re.IGNORECASE), lambda m: ''),]
|
||||||
keep_only_tags = [dict(id=['szpaltaL', 's2011'])]
|
keep_only_tags = [dict(id=['article-default-body'])]
|
||||||
remove_tags_after = dict(name='div', attrs={'class':'tresc'})
|
remove_tags = [dict(attrs={'class':['share_tools nocontent', 'rec']}), dict(id=['topComment', 'bottom_tools'])]
|
||||||
remove_tags = [dict(attrs={'class':['nnav', 'rMobi', 'tagi', 'rec']}), dict(name='a', attrs={'target':'_blank'})]
|
|
||||||
feeds = [(u'Wiadomo\u015bci', u'http://rssout.idg.pl/cw/news_iso.xml')]
|
feeds = [(u'Wiadomo\u015bci', u'http://rssout.idg.pl/cw/news_iso.xml')]
|
||||||
|
|
||||||
def skip_ad_pages(self, soup):
|
def skip_ad_pages(self, soup):
|
||||||
|
@ -18,8 +18,8 @@ class Dobreprogramy_pl(BasicNewsRecipe):
|
|||||||
max_articles_per_feed = 100
|
max_articles_per_feed = 100
|
||||||
remove_attrs = ['style', 'width', 'height']
|
remove_attrs = ['style', 'width', 'height']
|
||||||
preprocess_regexps = [(re.compile(ur'<div id="\S+360pmp4">Twoja przeglądarka nie obsługuje Flasha i HTML5 lub wyłączono obsługę JavaScript...</div>'), lambda match: '') ]
|
preprocess_regexps = [(re.compile(ur'<div id="\S+360pmp4">Twoja przeglądarka nie obsługuje Flasha i HTML5 lub wyłączono obsługę JavaScript...</div>'), lambda match: '') ]
|
||||||
keep_only_tags=[dict(attrs={'class':['news', 'entry single']})]
|
keep_only_tags = [dict(attrs={'class':['entry single']}), dict(id='phContent_divArticle')]
|
||||||
remove_tags = [dict(attrs={'class':['newsOptions', 'noPrint', 'komentarze', 'tags font-heading-master']}), dict(id='komentarze'), dict(name='iframe')]
|
remove_tags = [dict(attrs={'class':['newsOptions', 'noPrint', 'komentarze', 'tags font-heading-master', 'social nested-grid grid-margin-px15-top clearfix no-mobile', 'page-info text-h4 font-heading grid-margin-px15-top color-annotation clearfix']}), dict(id='komentarze'), dict(name='iframe')]
|
||||||
#remove_tags = [dict(name='div', attrs={'class':['komentarze', 'block', 'portalInfo', 'menuBar', 'topBar']})]
|
#remove_tags = [dict(name='div', attrs={'class':['komentarze', 'block', 'portalInfo', 'menuBar', 'topBar']})]
|
||||||
feeds = [(u'Aktualności', 'http://feeds.feedburner.com/dobreprogramy/Aktualnosci'),
|
feeds = [(u'Aktualności', 'http://feeds.feedburner.com/dobreprogramy/Aktualnosci'),
|
||||||
('Blogi', 'http://feeds.feedburner.com/dobreprogramy/BlogCzytelnikow')]
|
('Blogi', 'http://feeds.feedburner.com/dobreprogramy/BlogCzytelnikow')]
|
||||||
|
@ -9,6 +9,7 @@ class ForsalPL(BasicNewsRecipe):
|
|||||||
oldest_article = 7
|
oldest_article = 7
|
||||||
max_articles_per_feed = 100
|
max_articles_per_feed = 100
|
||||||
use_embedded_content = False
|
use_embedded_content = False
|
||||||
|
remove_empty_feeds = True
|
||||||
ignore_duplicate_articles = {'title', 'url'}
|
ignore_duplicate_articles = {'title', 'url'}
|
||||||
cover_url = 'http://www.bizneswnieruchomosciach.pl/wp-content/uploads/2010/07/logo_forsal.jpg'
|
cover_url = 'http://www.bizneswnieruchomosciach.pl/wp-content/uploads/2010/07/logo_forsal.jpg'
|
||||||
no_stylesheets = True
|
no_stylesheets = True
|
||||||
|
@ -11,6 +11,7 @@ class INFRA(BasicNewsRecipe):
|
|||||||
index='http://infra.org.pl'
|
index='http://infra.org.pl'
|
||||||
language = 'pl'
|
language = 'pl'
|
||||||
max_articles_per_feed = 100
|
max_articles_per_feed = 100
|
||||||
|
remove_empty_feeds = True
|
||||||
remove_attrs = ['style']
|
remove_attrs = ['style']
|
||||||
no_stylesheets = True
|
no_stylesheets = True
|
||||||
keep_only_tags = [dict(id='ja-current-content')]
|
keep_only_tags = [dict(id='ja-current-content')]
|
||||||
|
@ -13,5 +13,8 @@ class KDEFamilyPl(BasicNewsRecipe):
|
|||||||
preprocess_regexps = [(re.compile(r"Podobne wpisy.*", re.IGNORECASE|re.DOTALL), lambda m: '')]
|
preprocess_regexps = [(re.compile(r"Podobne wpisy.*", re.IGNORECASE|re.DOTALL), lambda m: '')]
|
||||||
no_stylesheets = True
|
no_stylesheets = True
|
||||||
remove_empty_feeds = True
|
remove_empty_feeds = True
|
||||||
use_embedded_content = True
|
use_embedded_content = False
|
||||||
|
keep_only_tags = [dict(attrs={'class':'blog-post'})]
|
||||||
|
remove_tags = [dict(attrs={'class':['blog-bottom', 'ratings hreview-aggregate']})]
|
||||||
|
|
||||||
feeds = [(u'Wszystko', u'http://kdefamily.pl/feed/')]
|
feeds = [(u'Wszystko', u'http://kdefamily.pl/feed/')]
|
@ -8,7 +8,7 @@ class Kosmonauta(BasicNewsRecipe):
|
|||||||
category = 'astronomy'
|
category = 'astronomy'
|
||||||
language = 'pl'
|
language = 'pl'
|
||||||
cover_url = 'http://bi.gazeta.pl/im/4/10393/z10393414X,Kosmonauta-net.jpg'
|
cover_url = 'http://bi.gazeta.pl/im/4/10393/z10393414X,Kosmonauta-net.jpg'
|
||||||
extra_css = '.thumbnail {float:left;margin-right:5px;}'
|
extra_css = '.thumb-left {float:left; margin-right:5px;} .calibre_navbar {clear: both;}'
|
||||||
no_stylesheets = True
|
no_stylesheets = True
|
||||||
INDEX = 'http://www.kosmonauta.net'
|
INDEX = 'http://www.kosmonauta.net'
|
||||||
oldest_article = 7
|
oldest_article = 7
|
||||||
|
@ -1,7 +1,7 @@
|
|||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
|
||||||
class Lomza(BasicNewsRecipe):
|
class Lomza(BasicNewsRecipe):
|
||||||
title = u'4Lomza'
|
title = u'4Łomza'
|
||||||
__author__ = 'fenuks'
|
__author__ = 'fenuks'
|
||||||
description = u'Regionalny portal. Najświeższe informacje z regionu, kulturalne, sportowe. Ogłoszenia, baza biznesu, forum.'
|
description = u'Regionalny portal. Najświeższe informacje z regionu, kulturalne, sportowe. Ogłoszenia, baza biznesu, forum.'
|
||||||
cover_url = 'http://www.4lomza.pl/i/logo4lomza_m.jpg'
|
cover_url = 'http://www.4lomza.pl/i/logo4lomza_m.jpg'
|
||||||
|
@ -10,7 +10,7 @@ class recipeMagic(BasicNewsRecipe):
|
|||||||
title = 'National Geographic PL'
|
title = 'National Geographic PL'
|
||||||
__author__ = 'Marcin Urban 2011'
|
__author__ = 'Marcin Urban 2011'
|
||||||
__modified_by__ = 'fenuks'
|
__modified_by__ = 'fenuks'
|
||||||
description = 'legenda wśród magazynów z historią sięgającą 120 lat'
|
description = u'Legenda wśród magazynów z historią sięgającą 120 lat'
|
||||||
#cover_url = 'http://www.guj.pl/var/guj/storage/images/media/nasze_magazyny/national_geographic/logo/ng_logo/2606-1-pol-PL/ng_logo.jpg'
|
#cover_url = 'http://www.guj.pl/var/guj/storage/images/media/nasze_magazyny/national_geographic/logo/ng_logo/2606-1-pol-PL/ng_logo.jpg'
|
||||||
oldest_article = 7
|
oldest_article = 7
|
||||||
max_articles_per_feed = 100
|
max_articles_per_feed = 100
|
||||||
@ -21,6 +21,7 @@ class recipeMagic(BasicNewsRecipe):
|
|||||||
publisher = 'G+J Gruner+Jahr Polska'
|
publisher = 'G+J Gruner+Jahr Polska'
|
||||||
category = 'news, PL,'
|
category = 'news, PL,'
|
||||||
language = 'pl'
|
language = 'pl'
|
||||||
|
remove_empty_feeds = True
|
||||||
publication_type = 'newsportal'
|
publication_type = 'newsportal'
|
||||||
extra_css = ''' body {font-family: verdana, arial, helvetica, geneva, sans-serif ;}
|
extra_css = ''' body {font-family: verdana, arial, helvetica, geneva, sans-serif ;}
|
||||||
h1{text-align: center;}
|
h1{text-align: center;}
|
||||||
@ -47,14 +48,16 @@ class recipeMagic(BasicNewsRecipe):
|
|||||||
|
|
||||||
def find_articles(self, url):
|
def find_articles(self, url):
|
||||||
articles = []
|
articles = []
|
||||||
soup=self.index_to_soup(url)
|
soup = self.index_to_soup(url)
|
||||||
tag=soup.find(attrs={'class':'arl'})
|
tag = soup.find(attrs={'class':'arl'})
|
||||||
art=tag.ul.findAll('li')
|
if not tag:
|
||||||
|
return articles
|
||||||
|
art = tag.ul.findAll('li')
|
||||||
for i in art:
|
for i in art:
|
||||||
title=i.a['title']
|
title = i.a['title']
|
||||||
url=i.a['href']
|
url = i.a['href']
|
||||||
#date=soup.find(id='footer').ul.li.string[41:-1]
|
#date=soup.find(id='footer').ul.li.string[41:-1]
|
||||||
desc=i.div.p.string
|
desc = i.div.p.string
|
||||||
articles.append({'title' : title,
|
articles.append({'title' : title,
|
||||||
'url' : url,
|
'url' : url,
|
||||||
'date' : '',
|
'date' : '',
|
||||||
|
102
recipes/odkrywcy_pl.recipe
Normal file
102
recipes/odkrywcy_pl.recipe
Normal file
@ -0,0 +1,102 @@
|
|||||||
|
__license__ = 'GPL v3'
|
||||||
|
import re
|
||||||
|
import datetime
|
||||||
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
from calibre.ebooks.BeautifulSoup import Comment
|
||||||
|
|
||||||
|
class Odkrywcy(BasicNewsRecipe):
|
||||||
|
title = u'Odkrywcy.pl'
|
||||||
|
__author__ = 'fenuks'
|
||||||
|
description = u''
|
||||||
|
#publication_type = ''
|
||||||
|
language = 'pl'
|
||||||
|
#encoding = ''
|
||||||
|
extra_css = 'img {display: block;}'
|
||||||
|
cover_url = ''
|
||||||
|
#masthead_url = ''
|
||||||
|
INDEX = 'http://odkrywcy.pl'
|
||||||
|
use_embedded_content = False
|
||||||
|
oldest_article = 7
|
||||||
|
max_articles_per_feed = 100
|
||||||
|
no_stylesheets = True
|
||||||
|
remove_empty_feeds = True
|
||||||
|
remove_javascript = True
|
||||||
|
remove_attributes = ['style', 'font']
|
||||||
|
ignore_duplicate_articles = {'title', 'url'}
|
||||||
|
|
||||||
|
keep_only_tags = [dict(attrs={'class':'content'})]
|
||||||
|
remove_tags = [dict(name='a', attrs={'href':['#opOpinie', '#opinie']}), dict(attrs={'class':['fr', 'clra', 'close', 'wpsocial-fbFanpageBox', 'tagi', 'test']}), dict(id=['rekSrd05', 'moreTopNews']), dict(name='img', attrs={'class':'zr'}), dict(name='img', attrs={'alt':u'Następne'})]
|
||||||
|
remove_tags_after = dict(id='aTxt')
|
||||||
|
#remove_tags_before = dict()
|
||||||
|
feeds = [(u'', '')]
|
||||||
|
|
||||||
|
def find_articles(self, url):
|
||||||
|
articles = []
|
||||||
|
soup = self.index_to_soup(url)
|
||||||
|
for i in soup.findAll(attrs={'class':'katZj clra'}):
|
||||||
|
tmp = i.find('small')
|
||||||
|
datestring = re.search('dodano: (\d{4}-\d{2}-\d{2})', tmp.string).group(1)
|
||||||
|
d = datetime.datetime.strptime(datestring, "%Y-%m-%d").date()
|
||||||
|
if (datetime.datetime.now().date() - d).days > self.oldest_article:
|
||||||
|
continue
|
||||||
|
tmp = i.find('a')
|
||||||
|
title = tmp.string
|
||||||
|
url = self.INDEX + tmp['href']
|
||||||
|
articles.append({'title' : title,
|
||||||
|
'url' : url,
|
||||||
|
'date' : '',
|
||||||
|
'description' : ''
|
||||||
|
})
|
||||||
|
return articles
|
||||||
|
|
||||||
|
def parse_index(self):
|
||||||
|
feeds = []
|
||||||
|
feeds.append((u'Człowiek', self.find_articles('http://odkrywcy.pl/kat,111396,name,Czlowiek,kategoria.html')))
|
||||||
|
feeds.append((u'Technologie', self.find_articles('http://odkrywcy.pl/kat,111398,name,Technologie,kategoria.html')))
|
||||||
|
feeds.append((u'Ekologia', self.find_articles('http://odkrywcy.pl/kat,111400,name,Ekologia,kategoria.html')))
|
||||||
|
feeds.append((u'Kosmos', self.find_articles('http://odkrywcy.pl/kat,111402,name,Kosmos,kategoria.html')))
|
||||||
|
feeds.append((u'Cywilizacja', self.find_articles('http://odkrywcy.pl/kat,111404,name,Cywilizacja,kategoria.html')))
|
||||||
|
feeds.append((u'Przyroda', self.find_articles('http://odkrywcy.pl/kat,111406,name,Przyroda,kategoria.html')))
|
||||||
|
feeds.append((u'Fizyka i chemia', self.find_articles('http://odkrywcy.pl/kat,111408,name,Fizyka,kategoria.html')))
|
||||||
|
feeds.append((u'Historia', self.find_articles('http://odkrywcy.pl/kat,122994,name,Historia,kategoria.html')))
|
||||||
|
feeds.append((u'Media', self.find_articles('http://odkrywcy.pl/kat,116794,name,Media,media.html')))
|
||||||
|
|
||||||
|
return feeds
|
||||||
|
|
||||||
|
def append_page(self, soup, appendtag):
|
||||||
|
tag = soup.find('a', attrs={'class': 'btnNext'})
|
||||||
|
urls = []
|
||||||
|
while tag is not None:
|
||||||
|
if tag['href'] in urls:
|
||||||
|
break
|
||||||
|
urls.append(tag['href'])
|
||||||
|
soup2 = self.index_to_soup(self.INDEX + tag['href'])
|
||||||
|
tag = soup2.find(name='a', attrs={'class': 'btnNext'})
|
||||||
|
pagetext = soup2.findAll(attrs={'class':'content'})
|
||||||
|
for container in pagetext:
|
||||||
|
header = container.find(name='h1')
|
||||||
|
if header:
|
||||||
|
header.extract()
|
||||||
|
for comment in container.findAll(text=lambda text:isinstance(text, Comment)):
|
||||||
|
comment.extract()
|
||||||
|
for container in pagetext:
|
||||||
|
pos = len(appendtag.contents)
|
||||||
|
appendtag.insert(pos, container)
|
||||||
|
for r in appendtag.findAll(attrs={'class':'galStr'}):
|
||||||
|
r.extract()
|
||||||
|
for r in appendtag.findAll(attrs={'alt':'Następne'}):
|
||||||
|
r.extract()
|
||||||
|
for r in appendtag.findAll(attrs={'alt':'Poprzednie'}):
|
||||||
|
r.extract()
|
||||||
|
for r in appendtag.findAll(attrs={'class':'clra'}):
|
||||||
|
r.extract()
|
||||||
|
for r in appendtag.findAll(attrs={'class':'close'}):
|
||||||
|
r.extract()
|
||||||
|
for r in appendtag.findAll(attrs={'class':'tagi'}):
|
||||||
|
r.extract()
|
||||||
|
for r in appendtag.findAll(attrs={'id':'moreTopNews'}):
|
||||||
|
r.extract()
|
||||||
|
|
||||||
|
def preprocess_html(self, soup):
|
||||||
|
self.append_page(soup, soup.body)
|
||||||
|
return soup
|
@ -2,7 +2,7 @@ __license__ = 'GPL v3'
|
|||||||
import re
|
import re
|
||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
|
||||||
class Poltergeist(BasicNewsRecipe):
|
class Polter(BasicNewsRecipe):
|
||||||
title = u'Polter.pl'
|
title = u'Polter.pl'
|
||||||
__author__ = 'fenuks'
|
__author__ = 'fenuks'
|
||||||
description = u'Największy polski serwis poświęcony ogólno pojętej fantastyce - grom fabularnym (RPG), książkom, filmowi, komiksowi, grom planszowym, karcianym i bitewnym.'
|
description = u'Największy polski serwis poświęcony ogólno pojętej fantastyce - grom fabularnym (RPG), książkom, filmowi, komiksowi, grom planszowym, karcianym i bitewnym.'
|
||||||
@ -10,23 +10,21 @@ class Poltergeist(BasicNewsRecipe):
|
|||||||
#publication_type = ''
|
#publication_type = ''
|
||||||
language = 'pl'
|
language = 'pl'
|
||||||
#encoding = ''
|
#encoding = ''
|
||||||
extra_css = '.image, .floatright {float: right; margin-left: 10px;} .floatleft {float: left; margin-right: 10px;}'
|
extra_css = '.image, .floatright {float: right; margin-left: 10px;} .floatleft {float: left; margin-right: 10px;} .calibre_navbar {clear: both;} .p_title {font-weight: bold;} .p_image {margin-left: auto; margin-right: auto; display: block;} .italic {font-style: italic;}'
|
||||||
cover_url = 'http://static.polter.pl/sub/promo/bpromo2524.jpg'
|
cover_url = 'http://static.polter.pl/sub/promo/bpromo2524.jpg'
|
||||||
#masthead_url = ''
|
#masthead_url = ''
|
||||||
use_embedded_content = False
|
use_embedded_content = False
|
||||||
oldest_article = 7
|
oldest_article = 7
|
||||||
preprocess_regexps = [(re.compile(ur'<div[^>]*?id="pol_lista"[^>]*?>.*</body>', re.DOTALL|re.IGNORECASE), lambda match: '</body>'), (re.compile(ur'<a[^>]*?>wersja do druku</a>', re.DOTALL|re.IGNORECASE), lambda match: '')]
|
|
||||||
max_articles_per_feed = 100
|
max_articles_per_feed = 100
|
||||||
no_stylesheets = True
|
no_stylesheets = True
|
||||||
remove_empty_feeds = True
|
remove_empty_feeds = True
|
||||||
remove_javascript = True
|
remove_javascript = True
|
||||||
remove_attributes = ['style', 'font']
|
remove_attributes = ['font', 'fieldset', 'onclick']
|
||||||
ignore_duplicate_articles = {'title', 'url'}
|
ignore_duplicate_articles = {'title', 'url'}
|
||||||
|
|
||||||
keep_only_tags = [dict(attrs={'class':'boxcontent'})]
|
keep_only_tags = [dict(attrs={'class':'boxcontent'})]
|
||||||
remove_tags = [dict(attrs={'class':'fb-like'}), dict(attrs={'alt':'Wersja do druku'}), dict(id='pol_liczba'), dict(attrs={'scr':'http://static.polter.pl/tplimg/buttons/ceneo_140_40.gif'})]
|
remove_tags = [dict(id='komentarze')]
|
||||||
remove_tags_after = dict(attrs={'class':'fb-like'})
|
remove_tags_after = dict(id='komentarze')
|
||||||
#remove_tags_before = dict()
|
|
||||||
|
|
||||||
feeds = [(u'Wieści', 'http://polter.pl/wiesci,rss.html'), (u'RPG', 'http://rpg.polter.pl/wiesci,rss.html'), (u'Książki', 'http://ksiazki.polter.pl/wiesci,rss.html'), (u'Film', 'http://film.polter.pl/wiesci,rss.html'), (u'Komiks', 'http://komiks.polter.pl/wiesci,rss.html'), (u'Gry bitewne', 'http://bitewniaki.polter.pl/wiesci,rss.html'), (u'Gry karciane', 'http://karcianki.polter.pl/wiesci,rss.html'), (u'Gry planszowe', 'http://planszowki.polter.pl/wiesci,rss.html'), (u'Gry PC', 'http://gry.polter.pl/wiesci,rss.html'), (u'Gry konsolowe', 'http://konsole.polter.pl/wiesci,rss.html'), (u'Konwenty', 'http://konwenty.polter.pl/wiesci,rss.html'), (u'Blogi', 'http://polter.pl/blogi,rss.html')]
|
feeds = [(u'Wieści', 'http://polter.pl/wiesci,rss.html'), (u'RPG', 'http://rpg.polter.pl/wiesci,rss.html'), (u'Książki', 'http://ksiazki.polter.pl/wiesci,rss.html'), (u'Film', 'http://film.polter.pl/wiesci,rss.html'), (u'Komiks', 'http://komiks.polter.pl/wiesci,rss.html'), (u'Gry bitewne', 'http://bitewniaki.polter.pl/wiesci,rss.html'), (u'Gry karciane', 'http://karcianki.polter.pl/wiesci,rss.html'), (u'Gry planszowe', 'http://planszowki.polter.pl/wiesci,rss.html'), (u'Gry PC', 'http://gry.polter.pl/wiesci,rss.html'), (u'Gry konsolowe', 'http://konsole.polter.pl/wiesci,rss.html'), (u'Konwenty', 'http://konwenty.polter.pl/wiesci,rss.html'), (u'Blogi', 'http://polter.pl/blogi,rss.html')]
|
||||||
|
|
||||||
@ -35,9 +33,24 @@ class Poltergeist(BasicNewsRecipe):
|
|||||||
s['class'] = 'floatleft'
|
s['class'] = 'floatleft'
|
||||||
for s in soup.findAll(attrs={'style':re.compile('float: ?right')}):
|
for s in soup.findAll(attrs={'style':re.compile('float: ?right')}):
|
||||||
s['class'] = 'floatright'
|
s['class'] = 'floatright'
|
||||||
|
for s in soup.findAll(style=True):
|
||||||
|
if 'bold;' in s['style']:
|
||||||
|
if s.get('class', ''):
|
||||||
|
s['class'] = s['class'] + ' p_title'
|
||||||
|
else:
|
||||||
|
s['class'] = 'p_title'
|
||||||
|
if 'italic;' in s['style']:
|
||||||
|
if s.get('class', ''):
|
||||||
|
s['class'] = s['class'] + ' italic'
|
||||||
|
else:
|
||||||
|
s['class'] = 'italic'
|
||||||
|
del s['style']
|
||||||
|
|
||||||
tag = soup.find(id='twoja_ocena')
|
tag = soup.find(id='twoja_ocena')
|
||||||
if tag:
|
if tag:
|
||||||
tag.parent.extract()
|
tag.parent.extract()
|
||||||
for tag in soup.findAll(id='lista_chce_ile'):
|
for tag in soup.findAll(id='lista_chce_ile'):
|
||||||
tag.parent.parent.extract()
|
tag.parent.parent.extract()
|
||||||
|
for r in soup.findAll(name='a', href=re.compile(r'^http://www.ceneo.pl/')):
|
||||||
|
r.extract()
|
||||||
return soup
|
return soup
|
File diff suppressed because one or more lines are too long
@ -14,7 +14,5 @@ class Tablety_pl(BasicNewsRecipe):
|
|||||||
max_articles_per_feed = 100
|
max_articles_per_feed = 100
|
||||||
preprocess_regexps = [(re.compile(ur'<p><strong>Przeczytaj także.*?</a></strong></p>', re.DOTALL), lambda match: ''), (re.compile(ur'<p><strong>Przeczytaj koniecznie.*?</a></strong></p>', re.DOTALL), lambda match: '')]
|
preprocess_regexps = [(re.compile(ur'<p><strong>Przeczytaj także.*?</a></strong></p>', re.DOTALL), lambda match: ''), (re.compile(ur'<p><strong>Przeczytaj koniecznie.*?</a></strong></p>', re.DOTALL), lambda match: '')]
|
||||||
keep_only_tags = [dict(id='news_block')]
|
keep_only_tags = [dict(id='news_block')]
|
||||||
#remove_tags_before=dict(name="h1", attrs={'class':'entry-title'})
|
remove_tags=[dict(attrs={'class':['comments_icon', 'wp-polls', 'entry-comments', 'wp-polls-loading', 'ts-fab-wrapper', 'entry-footer']})]
|
||||||
#remove_tags_after=dict(name="footer", attrs={'class':'entry-footer clearfix'})
|
|
||||||
remove_tags=[dict(attrs={'class':['comments_icon', 'wp-polls', 'entry-comments']})]
|
|
||||||
feeds = [(u'Najnowsze posty', u'http://www.tablety.pl/feed/')]
|
feeds = [(u'Najnowsze posty', u'http://www.tablety.pl/feed/')]
|
@ -16,33 +16,11 @@ class Wprost(BasicNewsRecipe):
|
|||||||
no_stylesheets = True
|
no_stylesheets = True
|
||||||
language = 'pl'
|
language = 'pl'
|
||||||
remove_javascript = True
|
remove_javascript = True
|
||||||
recursions = 0
|
|
||||||
use_embedded_content = False
|
use_embedded_content = False
|
||||||
ignore_duplicate_articles = {'title', 'url'}
|
ignore_duplicate_articles = {'title', 'url'}
|
||||||
remove_empty_feeds = True
|
remove_empty_feeds = True
|
||||||
remove_tags_before = dict(dict(name = 'div', attrs = {'id' : 'print-layer'}))
|
keep_only_tags = [dict(attrs={'class':'art-area'})]
|
||||||
remove_tags_after = dict(dict(name = 'div', attrs = {'id' : 'print-layer'}))
|
remove_tags = [dict(attrs={'class':'add300x250'})]
|
||||||
'''
|
|
||||||
keep_only_tags =[]
|
|
||||||
keep_only_tags.append(dict(name = 'table', attrs = {'id' : 'title-table'}))
|
|
||||||
keep_only_tags.append(dict(name = 'div', attrs = {'class' : 'div-header'}))
|
|
||||||
keep_only_tags.append(dict(name = 'div', attrs = {'class' : 'div-content'}))
|
|
||||||
keep_only_tags.append(dict(name = 'div', attrs = {'class' : 'def element-autor'}))
|
|
||||||
'''
|
|
||||||
|
|
||||||
preprocess_regexps = [(re.compile(r'style="display: none;"'), lambda match: ''),
|
|
||||||
(re.compile(r'display: block;'), lambda match: ''),
|
|
||||||
(re.compile(r'\<td\>\<tr\>\<\/table\>'), lambda match: ''),
|
|
||||||
(re.compile(r'\<table .*?\>'), lambda match: ''),
|
|
||||||
(re.compile(r'\<tr>'), lambda match: ''),
|
|
||||||
(re.compile(r'\<td .*?\>'), lambda match: ''),
|
|
||||||
(re.compile(r'\<div id="footer"\>.*?\</footer\>'), lambda match: '')]
|
|
||||||
|
|
||||||
remove_tags =[]
|
|
||||||
remove_tags.append(dict(name = 'div', attrs = {'class' : 'def element-date'}))
|
|
||||||
remove_tags.append(dict(name = 'div', attrs = {'class' : 'def silver'}))
|
|
||||||
remove_tags.append(dict(name = 'div', attrs = {'id' : 'content-main-column-right'}))
|
|
||||||
|
|
||||||
|
|
||||||
extra_css = '''.div-header {font-size: x-small; font-weight: bold}'''
|
extra_css = '''.div-header {font-size: x-small; font-weight: bold}'''
|
||||||
#h2 {font-size: x-large; font-weight: bold}
|
#h2 {font-size: x-large; font-weight: bold}
|
||||||
|
@ -13,6 +13,7 @@ class ZTS(BasicNewsRecipe):
|
|||||||
max_articles_per_feed = 100
|
max_articles_per_feed = 100
|
||||||
no_stylesheets = True
|
no_stylesheets = True
|
||||||
remove_empty_feeds = True
|
remove_empty_feeds = True
|
||||||
|
use_embedded_content = False
|
||||||
keep_only_tags = [dict(name='div', attrs={'class':'post postcontent'})]
|
keep_only_tags = [dict(name='div', attrs={'class':'post postcontent'})]
|
||||||
remove_tags = [dict(name='div', attrs={'class':'dolna-ramka'})]
|
remove_tags = [dict(name='div', attrs={'class':'dolna-ramka'})]
|
||||||
feeds = [(u'Strona g\u0142\xf3wna', u'http://feeds.feedburner.com/ZaufanaTrzeciaStronaGlowna'), (u'Drobiazgi', u'http://feeds.feedburner.com/ZaufanaTrzeciaStronaDrobiazgi')]
|
feeds = [(u'Strona g\u0142\xf3wna', u'http://feeds.feedburner.com/ZaufanaTrzeciaStronaGlowna'), (u'Drobiazgi', u'http://feeds.feedburner.com/ZaufanaTrzeciaStronaDrobiazgi')]
|
||||||
|
27
recipes/znadplanszy_pl.recipe
Normal file
27
recipes/znadplanszy_pl.recipe
Normal file
@ -0,0 +1,27 @@
|
|||||||
|
__license__ = 'GPL v3'
|
||||||
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
|
||||||
|
class ZnadPlanszy(BasicNewsRecipe):
|
||||||
|
title = u'ZnadPlanszy.pl'
|
||||||
|
__author__ = 'fenuks'
|
||||||
|
description = u''
|
||||||
|
#publication_type = ''
|
||||||
|
language = 'pl'
|
||||||
|
#encoding = ''
|
||||||
|
#extra_css = ''
|
||||||
|
cover_url = 'http://znadplanszy.pl/wp-content/uploads/2013/05/logo-znadplanszy.png'
|
||||||
|
#masthead_url = ''
|
||||||
|
use_embedded_content = False
|
||||||
|
oldest_article = 14
|
||||||
|
max_articles_per_feed = 100
|
||||||
|
no_stylesheets = True
|
||||||
|
remove_empty_feeds = True
|
||||||
|
remove_javascript = True
|
||||||
|
remove_attributes = ['style', 'font']
|
||||||
|
ignore_duplicate_articles = {'title', 'url'}
|
||||||
|
|
||||||
|
#keep_only_tags = [dict()]
|
||||||
|
remove_tags = [dict(attrs={'class':'rounded-container'})]
|
||||||
|
remove_tags_after = dict(attrs={'id':'dotEPUBcontent'})
|
||||||
|
remove_tags_before = dict(attrs={'class':'content units nine alpha'})
|
||||||
|
feeds = [(u'Wszystkie', 'http://znadplanszy.pl/full-feed/posts/')]
|
Loading…
x
Reference in New Issue
Block a user