mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
updated polish recipes
This commit is contained in:
parent
0ac211b150
commit
d7a5118c42
@ -14,6 +14,7 @@ class BenchmarkPl(BasicNewsRecipe):
|
||||
max_articles_per_feed = 100
|
||||
extra_css = 'ul {list-style-type: none;}'
|
||||
no_stylesheets = True
|
||||
use_embedded_content = False
|
||||
#remove_attributes = ['style']
|
||||
preprocess_regexps = [(re.compile(ur'<h3><span style="font-size: small;"> Zobacz poprzednie <a href="http://www.benchmark.pl/news/zestawienie/grupa_id/135">Opinie dnia:</a></span>.*</body>', re.DOTALL|re.IGNORECASE), lambda match: '</body>'), (re.compile(ur'Więcej o .*?</ul>', re.DOTALL|re.IGNORECASE), lambda match: '')]
|
||||
|
||||
|
@ -23,8 +23,8 @@ class cdrinfo(BasicNewsRecipe):
|
||||
preprocess_regexps = [(re.compile(u'<p[^>]*?>Uprzejmie prosimy o przestrzeganie netykiety.+?www\.gravatar\.com</a>\.</p>', re.DOTALL), lambda match: '')]
|
||||
ignore_duplicate_articles = {'title', 'url'}
|
||||
|
||||
keep_only_tags = [dict(name='input', attrs={'name':'ref'}), dict(id='text')]
|
||||
remove_tags = [dict(attrs={'class':['navigation', 'sociable']}), dict(name='hr'), dict(id='respond')]
|
||||
keep_only_tags = [dict(name='input', attrs={'name':'ref'}), dict(id=['text', 'text2'])]
|
||||
remove_tags = [dict(attrs={'class':['navigation', 'sociable', 'last6news']}), dict(name='hr'), dict(id='respond')]
|
||||
remove_tags_after = dict(id='artnawigacja')
|
||||
feeds = [(u'Wiadomości', 'http://feeds.feedburner.com/cdrinfo'), (u'Recenzje', 'http://www.cdrinfo.pl/rss/rss_recenzje.php'),
|
||||
(u'Konsole', 'http://konsole.cdrinfo.pl/rss/rss_konsole_news.xml'),
|
||||
|
@ -13,10 +13,11 @@ class Computerworld_pl(BasicNewsRecipe):
|
||||
oldest_article = 7
|
||||
max_articles_per_feed = 100
|
||||
remove_attributes = ['style',]
|
||||
use_embedded_content = False
|
||||
preprocess_regexps = [(re.compile(u'Zobacz również:', re.IGNORECASE), lambda m: ''), (re.compile(ur'[*]+reklama[*]+', re.IGNORECASE), lambda m: ''),]
|
||||
keep_only_tags = [dict(id=['szpaltaL', 's2011'])]
|
||||
remove_tags_after = dict(name='div', attrs={'class':'tresc'})
|
||||
remove_tags = [dict(attrs={'class':['nnav', 'rMobi', 'tagi', 'rec']}), dict(name='a', attrs={'target':'_blank'})]
|
||||
keep_only_tags = [dict(id=['article-default-body'])]
|
||||
remove_tags = [dict(attrs={'class':['share_tools nocontent', 'rec']}), dict(id=['topComment', 'bottom_tools'])]
|
||||
|
||||
feeds = [(u'Wiadomo\u015bci', u'http://rssout.idg.pl/cw/news_iso.xml')]
|
||||
|
||||
def skip_ad_pages(self, soup):
|
||||
|
@ -18,8 +18,8 @@ class Dobreprogramy_pl(BasicNewsRecipe):
|
||||
max_articles_per_feed = 100
|
||||
remove_attrs = ['style', 'width', 'height']
|
||||
preprocess_regexps = [(re.compile(ur'<div id="\S+360pmp4">Twoja przeglądarka nie obsługuje Flasha i HTML5 lub wyłączono obsługę JavaScript...</div>'), lambda match: '') ]
|
||||
keep_only_tags=[dict(attrs={'class':['news', 'entry single']})]
|
||||
remove_tags = [dict(attrs={'class':['newsOptions', 'noPrint', 'komentarze', 'tags font-heading-master']}), dict(id='komentarze'), dict(name='iframe')]
|
||||
keep_only_tags = [dict(attrs={'class':['entry single']}), dict(id='phContent_divArticle')]
|
||||
remove_tags = [dict(attrs={'class':['newsOptions', 'noPrint', 'komentarze', 'tags font-heading-master', 'social nested-grid grid-margin-px15-top clearfix no-mobile', 'page-info text-h4 font-heading grid-margin-px15-top color-annotation clearfix']}), dict(id='komentarze'), dict(name='iframe')]
|
||||
#remove_tags = [dict(name='div', attrs={'class':['komentarze', 'block', 'portalInfo', 'menuBar', 'topBar']})]
|
||||
feeds = [(u'Aktualności', 'http://feeds.feedburner.com/dobreprogramy/Aktualnosci'),
|
||||
('Blogi', 'http://feeds.feedburner.com/dobreprogramy/BlogCzytelnikow')]
|
||||
|
@ -11,6 +11,7 @@ class INFRA(BasicNewsRecipe):
|
||||
index='http://infra.org.pl'
|
||||
language = 'pl'
|
||||
max_articles_per_feed = 100
|
||||
remove_empty_feeds = True
|
||||
remove_attrs = ['style']
|
||||
no_stylesheets = True
|
||||
keep_only_tags = [dict(id='ja-current-content')]
|
||||
|
@ -13,5 +13,8 @@ class KDEFamilyPl(BasicNewsRecipe):
|
||||
preprocess_regexps = [(re.compile(r"Podobne wpisy.*", re.IGNORECASE|re.DOTALL), lambda m: '')]
|
||||
no_stylesheets = True
|
||||
remove_empty_feeds = True
|
||||
use_embedded_content = True
|
||||
use_embedded_content = False
|
||||
keep_only_tags = [dict(attrs={'class':'blog-post'})]
|
||||
remove_tags = [dict(attrs={'class':['blog-bottom', 'ratings hreview-aggregate']})]
|
||||
|
||||
feeds = [(u'Wszystko', u'http://kdefamily.pl/feed/')]
|
@ -1,7 +1,7 @@
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class Lomza(BasicNewsRecipe):
|
||||
title = u'4Lomza'
|
||||
title = u'4Łomza'
|
||||
__author__ = 'fenuks'
|
||||
description = u'Regionalny portal. Najświeższe informacje z regionu, kulturalne, sportowe. Ogłoszenia, baza biznesu, forum.'
|
||||
cover_url = 'http://www.4lomza.pl/i/logo4lomza_m.jpg'
|
||||
|
@ -10,7 +10,7 @@ class recipeMagic(BasicNewsRecipe):
|
||||
title = 'National Geographic PL'
|
||||
__author__ = 'Marcin Urban 2011'
|
||||
__modified_by__ = 'fenuks'
|
||||
description = 'Legenda wśród magazynów z historią sięgającą 120 lat'
|
||||
description = u'Legenda wśród magazynów z historią sięgającą 120 lat'
|
||||
#cover_url = 'http://www.guj.pl/var/guj/storage/images/media/nasze_magazyny/national_geographic/logo/ng_logo/2606-1-pol-PL/ng_logo.jpg'
|
||||
oldest_article = 7
|
||||
max_articles_per_feed = 100
|
||||
|
@ -15,12 +15,6 @@ class Polter(BasicNewsRecipe):
|
||||
#masthead_url = ''
|
||||
use_embedded_content = False
|
||||
oldest_article = 7
|
||||
preprocess_regexps = [(re.compile(ur'<div[^>]*?id="pol_lista"[^>]*?>.*</body>', re.DOTALL|re.IGNORECASE), lambda match: '</body>'),
|
||||
(re.compile(ur'<a[^>]*?>wersja do druku</a>', re.DOTALL|re.IGNORECASE), lambda match: ''),
|
||||
#(re.compile(ur'<a href="JavaScript:[^">]*">(<img ?[^>]*?/>)</a>', re.DOTALL|re.IGNORECASE), lambda match: '/1')
|
||||
(re.compile(ur'(<br ?/?>[\n\s\r]*){2,}', re.DOTALL|re.IGNORECASE), lambda match: '<br />'),
|
||||
(re.compile(ur'<span[^>]*>Zaloguj się aby wyłączyć tę reklamę</span>', re.DOTALL|re.IGNORECASE), lambda match: ''),
|
||||
]
|
||||
max_articles_per_feed = 100
|
||||
no_stylesheets = True
|
||||
remove_empty_feeds = True
|
||||
@ -29,9 +23,8 @@ class Polter(BasicNewsRecipe):
|
||||
ignore_duplicate_articles = {'title', 'url'}
|
||||
|
||||
keep_only_tags = [dict(attrs={'class':'boxcontent'})]
|
||||
remove_tags = [dict(attrs={'class':'fb-like'}), dict(attrs={'alt':'Wersja do druku'}), dict(id=['pol_liczba', 'col12AdSenseLight']), dict(attrs={'scr':'http://static.polter.pl/tplimg/buttons/ceneo_140_40.gif'}), dict(name=['g:plusone', 'fb:like'])]
|
||||
remove_tags_after = dict(attrs={'class':'fb-like'})
|
||||
#remove_tags_before = dict()
|
||||
remove_tags = [dict(id='komentarze')]
|
||||
remove_tags_after = dict(id='komentarze')
|
||||
|
||||
feeds = [(u'Wieści', 'http://polter.pl/wiesci,rss.html'), (u'RPG', 'http://rpg.polter.pl/wiesci,rss.html'), (u'Książki', 'http://ksiazki.polter.pl/wiesci,rss.html'), (u'Film', 'http://film.polter.pl/wiesci,rss.html'), (u'Komiks', 'http://komiks.polter.pl/wiesci,rss.html'), (u'Gry bitewne', 'http://bitewniaki.polter.pl/wiesci,rss.html'), (u'Gry karciane', 'http://karcianki.polter.pl/wiesci,rss.html'), (u'Gry planszowe', 'http://planszowki.polter.pl/wiesci,rss.html'), (u'Gry PC', 'http://gry.polter.pl/wiesci,rss.html'), (u'Gry konsolowe', 'http://konsole.polter.pl/wiesci,rss.html'), (u'Konwenty', 'http://konwenty.polter.pl/wiesci,rss.html'), (u'Blogi', 'http://polter.pl/blogi,rss.html')]
|
||||
|
||||
|
@ -16,33 +16,11 @@ class Wprost(BasicNewsRecipe):
|
||||
no_stylesheets = True
|
||||
language = 'pl'
|
||||
remove_javascript = True
|
||||
recursions = 0
|
||||
use_embedded_content = False
|
||||
ignore_duplicate_articles = {'title', 'url'}
|
||||
remove_empty_feeds = True
|
||||
remove_tags_before = dict(dict(name = 'div', attrs = {'id' : 'print-layer'}))
|
||||
remove_tags_after = dict(dict(name = 'div', attrs = {'id' : 'print-layer'}))
|
||||
'''
|
||||
keep_only_tags =[]
|
||||
keep_only_tags.append(dict(name = 'table', attrs = {'id' : 'title-table'}))
|
||||
keep_only_tags.append(dict(name = 'div', attrs = {'class' : 'div-header'}))
|
||||
keep_only_tags.append(dict(name = 'div', attrs = {'class' : 'div-content'}))
|
||||
keep_only_tags.append(dict(name = 'div', attrs = {'class' : 'def element-autor'}))
|
||||
'''
|
||||
|
||||
preprocess_regexps = [(re.compile(r'style="display: none;"'), lambda match: ''),
|
||||
(re.compile(r'display: block;'), lambda match: ''),
|
||||
(re.compile(r'\<td\>\<tr\>\<\/table\>'), lambda match: ''),
|
||||
(re.compile(r'\<table .*?\>'), lambda match: ''),
|
||||
(re.compile(r'\<tr>'), lambda match: ''),
|
||||
(re.compile(r'\<td .*?\>'), lambda match: ''),
|
||||
(re.compile(r'\<div id="footer"\>.*?\</footer\>'), lambda match: '')]
|
||||
|
||||
remove_tags =[]
|
||||
remove_tags.append(dict(name = 'div', attrs = {'class' : 'def element-date'}))
|
||||
remove_tags.append(dict(name = 'div', attrs = {'class' : 'def silver'}))
|
||||
remove_tags.append(dict(name = 'div', attrs = {'id' : 'content-main-column-right'}))
|
||||
|
||||
keep_only_tags = [dict(attrs={'class':'art-area'})]
|
||||
remove_tags = [dict(attrs={'class':'add300x250'})]
|
||||
|
||||
extra_css = '''.div-header {font-size: x-small; font-weight: bold}'''
|
||||
#h2 {font-size: x-large; font-weight: bold}
|
||||
|
@ -13,6 +13,7 @@ class ZTS(BasicNewsRecipe):
|
||||
max_articles_per_feed = 100
|
||||
no_stylesheets = True
|
||||
remove_empty_feeds = True
|
||||
use_embedded_content = False
|
||||
keep_only_tags = [dict(name='div', attrs={'class':'post postcontent'})]
|
||||
remove_tags = [dict(name='div', attrs={'class':'dolna-ramka'})]
|
||||
feeds = [(u'Strona g\u0142\xf3wna', u'http://feeds.feedburner.com/ZaufanaTrzeciaStronaGlowna'), (u'Drobiazgi', u'http://feeds.feedburner.com/ZaufanaTrzeciaStronaDrobiazgi')]
|
||||
|
Loading…
x
Reference in New Issue
Block a user