mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Various updated Polish recipes
This commit is contained in:
parent
8822ef28f9
commit
3afc065c2a
@ -1,6 +1,6 @@
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
import re
|
||||
class Benchmark_pl(BasicNewsRecipe):
|
||||
class BenchmarkPl(BasicNewsRecipe):
|
||||
title = u'Benchmark.pl'
|
||||
__author__ = 'fenuks'
|
||||
description = u'benchmark.pl -IT site'
|
||||
@ -14,7 +14,7 @@ class Benchmark_pl(BasicNewsRecipe):
|
||||
preprocess_regexps = [(re.compile(ur'<h3><span style="font-size: small;"> Zobacz poprzednie <a href="http://www.benchmark.pl/news/zestawienie/grupa_id/135">Opinie dnia:</a></span>.*</body>', re.DOTALL|re.IGNORECASE), lambda match: '</body>'), (re.compile(ur'Więcej o .*?</ul>', re.DOTALL|re.IGNORECASE), lambda match: '')]
|
||||
keep_only_tags=[dict(name='div', attrs={'class':['m_zwykly', 'gallery']})]
|
||||
remove_tags_after=dict(name='div', attrs={'class':'body'})
|
||||
remove_tags=[dict(name='div', attrs={'class':['kategoria', 'socialize', 'thumb', 'panelOcenaObserwowane', 'categoryNextToSocializeGallery']}), dict(name='table', attrs={'background':'http://www.benchmark.pl/uploads/backend_img/a/fotki_newsy/opinie_dnia/bg.png'}), dict(name='table', attrs={'width':'210', 'cellspacing':'1', 'cellpadding':'4', 'border':'0', 'align':'right'})]
|
||||
remove_tags=[dict(name='div', attrs={'class':['kategoria', 'socialize', 'thumb', 'panelOcenaObserwowane', 'categoryNextToSocializeGallery', 'breadcrumb']}), dict(name='table', attrs={'background':'http://www.benchmark.pl/uploads/backend_img/a/fotki_newsy/opinie_dnia/bg.png'}), dict(name='table', attrs={'width':'210', 'cellspacing':'1', 'cellpadding':'4', 'border':'0', 'align':'right'})]
|
||||
INDEX= 'http://www.benchmark.pl'
|
||||
feeds = [(u'Aktualności', u'http://www.benchmark.pl/rss/aktualnosci-pliki.xml'),
|
||||
(u'Testy i recenzje', u'http://www.benchmark.pl/rss/testy-recenzje-minirecenzje.xml')]
|
||||
|
@ -1,6 +1,7 @@
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class Filmweb_pl(BasicNewsRecipe):
|
||||
import re
|
||||
from calibre.ebooks.BeautifulSoup import BeautifulSoup
|
||||
class FilmWebPl(BasicNewsRecipe):
|
||||
title = u'FilmWeb'
|
||||
__author__ = 'fenuks'
|
||||
description = 'FilmWeb - biggest polish movie site'
|
||||
@ -12,8 +13,9 @@ class Filmweb_pl(BasicNewsRecipe):
|
||||
max_articles_per_feed = 100
|
||||
no_stylesheets= True
|
||||
remove_empty_feeds=True
|
||||
preprocess_regexps = [(re.compile(u'\(kliknij\,\ aby powiększyć\)', re.IGNORECASE), lambda m: ''), ]#(re.compile(ur' | ', re.IGNORECASE), lambda m: '')]
|
||||
extra_css = '.hdrBig {font-size:22px;} ul {list-style-type:none; padding: 0; margin: 0;}'
|
||||
remove_tags= [dict(name='div', attrs={'class':['recommendOthers']}), dict(name='ul', attrs={'class':'fontSizeSet'})]
|
||||
remove_tags= [dict(name='div', attrs={'class':['recommendOthers']}), dict(name='ul', attrs={'class':'fontSizeSet'}), dict(attrs={'class':'userSurname anno'})]
|
||||
keep_only_tags= [dict(name='h1', attrs={'class':['hdrBig', 'hdrEntity']}), dict(name='div', attrs={'class':['newsInfo', 'newsInfoSmall', 'reviewContent description']})]
|
||||
feeds = [(u'Wszystkie newsy', u'http://www.filmweb.pl/feed/news/latest'),
|
||||
(u'News / Filmy w produkcji', 'http://www.filmweb.pl/feed/news/category/filminproduction'),
|
||||
@ -31,18 +33,22 @@ class Filmweb_pl(BasicNewsRecipe):
|
||||
(u'News / Kino polskie', u'http://www.filmweb.pl/feed/news/category/polish.cinema'),
|
||||
(u'News / Telewizja', u'http://www.filmweb.pl/feed/news/category/tv'),
|
||||
(u'Recenzje redakcji', u'http://www.filmweb.pl/feed/reviews/latest'),
|
||||
(u'Recenzje użytkowników', u'http://www.filmweb.pl/feed/user-reviews/latest')]
|
||||
(u'Recenzje użytkowników', u'http://www.filmweb.pl/feed/user-reviews/latest')
|
||||
]
|
||||
|
||||
def skip_ad_pages(self, soup):
|
||||
def skip_ad_pages(self, soup):
|
||||
skip_tag = soup.find('a', attrs={'class':'welcomeScreenButton'})
|
||||
if skip_tag is not None:
|
||||
self.log.warn('skip_tag')
|
||||
self.log.warn(skip_tag)
|
||||
return self.index_to_soup(skip_tag['href'], raw=True)
|
||||
|
||||
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
for a in soup('a'):
|
||||
if a.has_key('href') and 'http://' not in a['href'] and 'https://' not in a['href']:
|
||||
a['href']=self.index + a['href']
|
||||
return soup
|
||||
for i in soup.findAll('a', attrs={'class':'fn'}):
|
||||
i.insert(len(i), BeautifulSoup('<br />'))
|
||||
for i in soup.findAll('sup'):
|
||||
if not i.string or i.string.startswith('(kliknij'):
|
||||
i.extract()
|
||||
return soup
|
||||
|
@ -1,6 +1,6 @@
|
||||
from calibre.web.feeds.recipes import BasicNewsRecipe
|
||||
|
||||
class Gry_online_pl(BasicNewsRecipe):
|
||||
class GryOnlinePl(BasicNewsRecipe):
|
||||
title = u'Gry-Online.pl'
|
||||
__author__ = 'fenuks'
|
||||
description = 'Gry-Online.pl - computer games'
|
||||
@ -21,17 +21,18 @@ class Gry_online_pl(BasicNewsRecipe):
|
||||
tag = appendtag.find('div', attrs={'class':'n5p'})
|
||||
if tag:
|
||||
nexturls=tag.findAll('a')
|
||||
for nexturl in nexturls[1:]:
|
||||
try:
|
||||
soup2 = self.index_to_soup('http://www.gry-online.pl/S020.asp'+ nexturl['href'])
|
||||
except:
|
||||
soup2 = self.index_to_soup('http://www.gry-online.pl/S022.asp'+ nexturl['href'])
|
||||
url_part = soup.find('link', attrs={'rel':'canonical'})['href']
|
||||
url_part = url_part[25:].rpartition('?')[0]
|
||||
for nexturl in nexturls[1:-1]:
|
||||
soup2 = self.index_to_soup('http://www.gry-online.pl/' + url_part + nexturl['href'])
|
||||
pagetext = soup2.find(attrs={'class':'gc660'})
|
||||
for r in pagetext.findAll(name='header'):
|
||||
r.extract()
|
||||
for r in pagetext.findAll(attrs={'itemprop':'description'}):
|
||||
r.extract()
|
||||
pos = len(appendtag.contents)
|
||||
appendtag.insert(pos, pagetext)
|
||||
for r in appendtag.findAll(attrs={'class':['n5p', 'add-info', 'twitter-share-button']}):
|
||||
for r in appendtag.findAll(attrs={'class':['n5p', 'add-info', 'twitter-share-button', 'lista lista3 lista-gry']}):
|
||||
r.extract()
|
||||
|
||||
|
||||
|
@ -1,3 +1,4 @@
|
||||
import re
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class NaTemat(BasicNewsRecipe):
|
||||
@ -8,8 +9,9 @@ class NaTemat(BasicNewsRecipe):
|
||||
description = u'informacje, komentarze, opinie'
|
||||
category = 'news'
|
||||
language = 'pl'
|
||||
preprocess_regexps = [(re.compile(ur'Czytaj też\:.*?</a>', re.IGNORECASE), lambda m: ''), (re.compile(ur'Zobacz też\:.*?</a>', re.IGNORECASE), lambda m: ''), (re.compile(ur'Czytaj więcej\:.*?</a>', re.IGNORECASE), lambda m: ''), (re.compile(ur'Czytaj również\:.*?</a>', re.IGNORECASE), lambda m: '')]
|
||||
cover_url= 'http://blog.plona.pl/wp-content/uploads/2012/05/natemat.png'
|
||||
no_stylesheets = True
|
||||
keep_only_tags= [dict(id='main')]
|
||||
remove_tags= [dict(attrs={'class':['button', 'block-inside style_default', 'article-related']})]
|
||||
remove_tags= [dict(attrs={'class':['button', 'block-inside style_default', 'article-related', 'user-header', 'links']}), dict(name='img', attrs={'class':'indent'})]
|
||||
feeds = [(u'Artyku\u0142y', u'http://natemat.pl/rss/wszystkie')]
|
||||
|
@ -1,7 +1,7 @@
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
import re
|
||||
|
||||
class AdvancedUserRecipe1312886443(BasicNewsRecipe):
|
||||
class WNP(BasicNewsRecipe):
|
||||
title = u'WNP'
|
||||
cover_url= 'http://k.wnp.pl/images/wnpLogo.gif'
|
||||
__author__ = 'fenuks'
|
||||
@ -12,7 +12,7 @@ class AdvancedUserRecipe1312886443(BasicNewsRecipe):
|
||||
oldest_article = 8
|
||||
max_articles_per_feed = 100
|
||||
no_stylesheets= True
|
||||
remove_tags=[dict(attrs={'class':'printF'})]
|
||||
remove_tags=[dict(attrs={'class':['printF', 'border3B2 clearfix', 'articleMenu clearfix']})]
|
||||
feeds = [(u'Wiadomości gospodarcze', u'http://www.wnp.pl/rss/serwis_rss.xml'),
|
||||
(u'Serwis Energetyka - Gaz', u'http://www.wnp.pl/rss/serwis_rss_1.xml'),
|
||||
(u'Serwis Nafta - Chemia', u'http://www.wnp.pl/rss/serwis_rss_2.xml'),
|
||||
|
Loading…
x
Reference in New Issue
Block a user