Lifehacker.pl by MrStefan, Hatalska by teepel and update telepolis.pl
@ -43,6 +43,6 @@ class AntywebRecipe(BasicNewsRecipe):
|
||||
def preprocess_html(self, soup):
|
||||
for alink in soup.findAll('a'):
|
||||
if alink.string is not None:
|
||||
tstr = alink.string
|
||||
alink.replaceWith(tstr)
|
||||
return soup
|
||||
tstr = alink.string
|
||||
alink.replaceWith(tstr)
|
||||
return soup
|
||||
|
@ -24,4 +24,3 @@ class app_funds(BasicNewsRecipe):
|
||||
auto_cleanup = True
|
||||
|
||||
feeds = [(u'blog', u'http://feeds.feedburner.com/blogspot/etVI')]
|
||||
|
||||
|
@ -47,4 +47,3 @@ class bankier(BasicNewsRecipe):
|
||||
segments = urlPart.split('-')
|
||||
urlPart2 = segments[-1]
|
||||
return 'http://www.bankier.pl/wiadomosci/print.html?article_id=' + urlPart2
|
||||
|
||||
|
@ -49,8 +49,8 @@ class gw_krakow(BasicNewsRecipe):
|
||||
feeds = [(u'Wiadomości', u'http://rss.gazeta.pl/pub/rss/krakow.xml')]
|
||||
|
||||
def skip_ad_pages(self, soup):
|
||||
tag=soup.find(name='a', attrs={'class':'btn'})
|
||||
if tag:
|
||||
tag=soup.find(name='a', attrs={'class':'btn'})
|
||||
if tag:
|
||||
new_soup=self.index_to_soup(tag['href'], raw=True)
|
||||
return new_soup
|
||||
|
||||
@ -95,8 +95,7 @@ class gw_krakow(BasicNewsRecipe):
|
||||
rem.extract()
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
self.append_page(soup, soup.body)
|
||||
if soup.find(id='container_gal'):
|
||||
self.gallery_article(soup.body)
|
||||
return soup
|
||||
|
||||
self.append_page(soup, soup.body)
|
||||
if soup.find(id='container_gal'):
|
||||
self.gallery_article(soup.body)
|
||||
return soup
|
||||
|
@ -46,8 +46,8 @@ class gw_wawa(BasicNewsRecipe):
|
||||
feeds = [(u'Wiadomości', u'http://rss.gazeta.pl/pub/rss/warszawa.xml')]
|
||||
|
||||
def skip_ad_pages(self, soup):
|
||||
tag=soup.find(name='a', attrs={'class':'btn'})
|
||||
if tag:
|
||||
tag=soup.find(name='a', attrs={'class':'btn'})
|
||||
if tag:
|
||||
new_soup=self.index_to_soup(tag['href'], raw=True)
|
||||
return new_soup
|
||||
|
||||
@ -92,8 +92,7 @@ class gw_wawa(BasicNewsRecipe):
|
||||
rem.extract()
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
self.append_page(soup, soup.body)
|
||||
if soup.find(id='container_gal'):
|
||||
self.gallery_article(soup.body)
|
||||
return soup
|
||||
|
||||
self.append_page(soup, soup.body)
|
||||
if soup.find(id='container_gal'):
|
||||
self.gallery_article(soup.body)
|
||||
return soup
|
||||
|
@ -100,5 +100,3 @@ class GazetaPomorska(BasicNewsRecipe):
|
||||
|
||||
extra_css = '''h1 { font-size: 1.4em; }
|
||||
h2 { font-size: 1.0em; }'''
|
||||
|
||||
|
||||
|
28
recipes/hatalska.recipe
Normal file
@ -0,0 +1,28 @@
|
||||
#!/usr/bin/env python
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = 'teepel 2012'
|
||||
|
||||
'''
|
||||
hatalska.com
|
||||
'''
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
import re
|
||||
|
||||
class hatalska(BasicNewsRecipe):
|
||||
title = u'Hatalska'
|
||||
__author__ = 'teepel <teepel44@gmail.com>'
|
||||
language = 'pl'
|
||||
description = u'Blog specjalistki z branży mediowo-reklamowej - Natalii Hatalskiej'
|
||||
oldest_article = 7
|
||||
masthead_url='http://hatalska.com/wp-content/themes/jamel/images/logo.png'
|
||||
max_articles_per_feed = 100
|
||||
simultaneous_downloads = 5
|
||||
remove_javascript=True
|
||||
no_stylesheets=True
|
||||
|
||||
remove_tags =[]
|
||||
remove_tags.append(dict(name = 'div', attrs = {'class' : 'feedflare'}))
|
||||
|
||||
feeds = [(u'Blog', u'http://feeds.feedburner.com/hatalskacom')]
|
Before Width: | Height: | Size: 475 B After Width: | Height: | Size: 1.1 KiB |
BIN
recipes/icons/hatalska.png
Normal file
After Width: | Height: | Size: 387 B |
BIN
recipes/icons/lifehacker_pl.png
Normal file
After Width: | Height: | Size: 1.1 KiB |
BIN
recipes/icons/money_pl.png
Normal file
After Width: | Height: | Size: 428 B |
Before Width: | Height: | Size: 5.1 KiB After Width: | Height: | Size: 1.4 KiB |
Before Width: | Height: | Size: 1.7 KiB After Width: | Height: | Size: 1018 B |
Before Width: | Height: | Size: 1.7 KiB After Width: | Height: | Size: 1018 B |
@ -47,6 +47,6 @@ class KrytykaPolitycznaRecipe(BasicNewsRecipe):
|
||||
def preprocess_html(self, soup):
|
||||
for alink in soup.findAll('a'):
|
||||
if alink.string is not None:
|
||||
tstr = alink.string
|
||||
alink.replaceWith(tstr)
|
||||
tstr = alink.string
|
||||
alink.replaceWith(tstr)
|
||||
return soup
|
||||
|
25
recipes/lifehacker_pl.recipe
Normal file
@ -0,0 +1,25 @@
|
||||
#!/usr/bin/env python
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = 'MrStefan'
|
||||
|
||||
'''
|
||||
www.lifehacking.pl
|
||||
'''
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
import re
|
||||
|
||||
class lifehacking(BasicNewsRecipe):
|
||||
title = u'Lifehacker Polska'
|
||||
__author__ = 'MrStefan <mrstefaan@gmail.com>'
|
||||
language = 'pl'
|
||||
description =u'Lifehacking - sposoby na zwiększanie własnej wydajności. Ułatwiaj sobie życie, wykorzystując wiedzę, metody, technologie, przydatne strony ...'
|
||||
masthead_url='http://lifehacking.pl/wp-content/themes/lifehacking/images/lifehackerlogo.png'
|
||||
remove_empty_feeds= True
|
||||
oldest_article = 7
|
||||
max_articles_per_feed = 100
|
||||
remove_javascript=True
|
||||
no_stylesheets=True
|
||||
|
||||
feeds = [(u'Lifehacker polska', u'http://feeds.feedburner.com/pl_lh')]
|
@ -8,8 +8,6 @@ michalkiewicz.pl
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
#
|
||||
|
||||
class michalkiewicz(BasicNewsRecipe):
|
||||
title = u'Stanis\u0142aw Michalkiewicz'
|
||||
description = u'Strona autorska * felietony * artyku\u0142y * komentarze'
|
||||
@ -23,4 +21,3 @@ class michalkiewicz(BasicNewsRecipe):
|
||||
remove_tags = [dict(name='ul', attrs={'class':'menu'})]
|
||||
|
||||
feeds = [(u'Teksty', u'http://www.michalkiewicz.pl/rss.xml')]
|
||||
|
||||
|
@ -60,7 +60,7 @@ class FocusRecipe(BasicNewsRecipe):
|
||||
]
|
||||
|
||||
def print_version(self, url):
|
||||
if url.count ('money.pl.feedsportal.com'):
|
||||
if url.count ('money.pl.feedsportal.com'):
|
||||
u = url.find('0Cartykul0C')
|
||||
u = 'http://www.m.money.pl/wiadomosci/artykul/' + url[u + 21:]
|
||||
u = u.replace('0C', '/')
|
||||
@ -71,6 +71,6 @@ class FocusRecipe(BasicNewsRecipe):
|
||||
u = u.replace ('0B','.')
|
||||
u = u.replace (',0,',',-1,')
|
||||
u = u.replace('0Tutm0Isource0Frss0Gutm0Imedium0Frss0Gutm0Icampaign0Frss/story01.htm', '')
|
||||
else:
|
||||
else:
|
||||
u = url.replace('/nc/1','/do-druku/1')
|
||||
return u
|
||||
return u
|
||||
|
@ -44,6 +44,6 @@ class MyAppleRecipe(BasicNewsRecipe):
|
||||
def preprocess_html(self, soup):
|
||||
for alink in soup.findAll('a'):
|
||||
if alink.string is not None:
|
||||
tstr = alink.string
|
||||
alink.replaceWith(tstr)
|
||||
tstr = alink.string
|
||||
alink.replaceWith(tstr)
|
||||
return soup
|
||||
|
@ -37,4 +37,3 @@ class rynek_kolejowy(BasicNewsRecipe):
|
||||
segment = url.split('/')
|
||||
urlPart = segment[3]
|
||||
return 'http://www.rynek-kolejowy.pl/drukuj.php?id=' + urlPart
|
||||
|
||||
|
@ -70,5 +70,3 @@ class RzeczpospolitaRecipe(BasicNewsRecipe):
|
||||
forget, sep, index = rest.rpartition(',')
|
||||
|
||||
return start + '/' + index + '?print=tak'
|
||||
|
||||
|
||||
|
@ -8,10 +8,7 @@ class SATKurier(BasicNewsRecipe):
|
||||
title = u'SATKurier.pl'
|
||||
__author__ = 'Artur Stachecki <artur.stachecki@gmail.com>'
|
||||
language = 'pl'
|
||||
description = u'Największy i najstarszy serwis poświęcony\
|
||||
telewizji cyfrowej, przygotowywany przez wydawcę\
|
||||
miesięcznika SAT Kurier. Bieżące wydarzenia\
|
||||
z rynku mediów i nowych technologii.'
|
||||
description = u'Serwis poświęcony telewizji cyfrowej'
|
||||
oldest_article = 7
|
||||
masthead_url = 'http://satkurier.pl/img/header_sk_logo.gif'
|
||||
max_articles_per_feed = 100
|
||||
|
@ -22,4 +22,3 @@ class swiatczytnikow(BasicNewsRecipe):
|
||||
remove_tags = [dict(name = 'ul', attrs = {'class' : 'similar-posts'})]
|
||||
|
||||
preprocess_regexps = [(re.compile(u'<h3>Czytaj dalej:</h3>'), lambda match: '')]
|
||||
|
||||
|
@ -8,60 +8,20 @@ import re
|
||||
|
||||
class telepolis(BasicNewsRecipe):
|
||||
title = u'Telepolis.pl'
|
||||
__author__ = 'Artur Stachecki <artur.stachecki@gmail.com>'
|
||||
__author__ = 'Artur Stachecki <artur.stachecki@gmail.com>, Tomasz Długosz <tomek3d@gmail.com>'
|
||||
|
||||
language = 'pl'
|
||||
description = u'Twój telekomunikacyjny serwis informacyjny.\
|
||||
Codzienne informacje, testy i artykuły,\
|
||||
promocje, baza telefonów oraz centrum rozrywki'
|
||||
oldest_article = 7
|
||||
description = u'Twój telekomunikacyjny serwis informacyjny.'
|
||||
masthead_url = 'http://telepolis.pl/i/telepolis-logo2.gif'
|
||||
max_articles_per_feed = 100
|
||||
simultaneous_downloads = 5
|
||||
remove_javascript = True
|
||||
no_stylesheets = True
|
||||
use_embedded_content = False
|
||||
|
||||
remove_tags = []
|
||||
remove_tags.append(dict(attrs={'alt': 'TELEPOLIS.pl'}))
|
||||
|
||||
preprocess_regexps = [(re.compile(r'<: .*? :>'),
|
||||
lambda match: ''),
|
||||
(re.compile(r'<b>Zobacz:</b>.*?</a>', re.DOTALL),
|
||||
lambda match: ''),
|
||||
(re.compile(r'<-ankieta.*?>'),
|
||||
lambda match: ''),
|
||||
(re.compile(r'\(Q\!\)'),
|
||||
lambda match: ''),
|
||||
(re.compile(r'\(plik.*?\)'),
|
||||
lambda match: ''),
|
||||
(re.compile(r'<br.*?><br.*?>', re.DOTALL),
|
||||
lambda match: '')
|
||||
]
|
||||
|
||||
extra_css = '''.tb { font-weight: bold; font-size: 20px;}'''
|
||||
|
||||
feeds = [
|
||||
(u'Wiadomości', u'http://www.telepolis.pl/rss/news.php'),
|
||||
(u'Artykuły', u'http://www.telepolis.pl/rss/artykuly.php')
|
||||
(u'Wiadomości', u'http://www.telepolis.pl/rss/news.php')#,
|
||||
#(u'Artykuły', u'http://www.telepolis.pl/rss/artykuly.php')
|
||||
]
|
||||
|
||||
def print_version(self, url):
|
||||
if 'news.php' in url:
|
||||
print_url = url.replace('news.php', 'news_print.php')
|
||||
else:
|
||||
print_url = url.replace('artykuly.php', 'art_print.php')
|
||||
return print_url
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
for image in soup.findAll('img'):
|
||||
if 'm.jpg' in image['src']:
|
||||
image_big = image['src']
|
||||
image_big = image_big.replace('m.jpg', '.jpg')
|
||||
image['src'] = image_big
|
||||
logo = soup.find('tr')
|
||||
logo.extract()
|
||||
for tag in soup.findAll('tr'):
|
||||
for strings in ['Wiadomość wydrukowana', 'copyright']:
|
||||
if strings in self.tag_to_string(tag):
|
||||
tag.extract()
|
||||
return self.adeify_images(soup)
|
||||
keep_only_tags = [
|
||||
dict(name='div', attrs={'class':'flol w510'}),
|
||||
dict(name='div', attrs={'class':'main_tresc_news'})
|
||||
]
|
||||
|