mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Various Polish recipes by Artur Stachecki
This commit is contained in:
commit
6712594a3e
48
recipes/antyweb.recipe
Normal file
48
recipes/antyweb.recipe
Normal file
@ -0,0 +1,48 @@
|
|||||||
|
|
||||||
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
|
||||||
|
class AntywebRecipe(BasicNewsRecipe):
|
||||||
|
encoding = 'utf-8'
|
||||||
|
__license__ = 'GPL v3'
|
||||||
|
__author__ = u'Artur Stachecki <artur.stachecki@gmail.com>'
|
||||||
|
language = 'pl'
|
||||||
|
version = 1
|
||||||
|
title = u'Antyweb'
|
||||||
|
category = u'News'
|
||||||
|
description = u'Blog o internecie i nowych technologiach'
|
||||||
|
cover_url=''
|
||||||
|
remove_empty_feeds= True
|
||||||
|
auto_cleanup = False
|
||||||
|
no_stylesheets=True
|
||||||
|
use_embedded_content = False
|
||||||
|
oldest_article = 1
|
||||||
|
max_articles_per_feed = 100
|
||||||
|
remove_javascript = True
|
||||||
|
simultaneous_downloads = 3
|
||||||
|
|
||||||
|
keep_only_tags =[]
|
||||||
|
keep_only_tags.append(dict(name = 'h1', attrs = { 'class' : 'mm-article-title'}))
|
||||||
|
keep_only_tags.append(dict(name = 'div', attrs = {'class' : 'mm-article-content'}))
|
||||||
|
|
||||||
|
|
||||||
|
remove_tags =[]
|
||||||
|
remove_tags.append(dict(name = 'h2', attrs = {'class' : 'widgettitle'}))
|
||||||
|
remove_tags.append(dict(name = 'img', attrs = {'class' : 'alignleft'}))
|
||||||
|
remove_tags.append(dict(name = 'div', attrs = {'class' : 'float: right;margin-left:1em;margin-bottom: 0.5em;padding-bottom: 3px; width: 72px;'}))
|
||||||
|
remove_tags.append(dict(name = 'img', attrs = {'src' : 'http://antyweb.pl/wp-content/uploads/2011/09/HOSTERSI_testy_pasek600x30.gif'}))
|
||||||
|
remove_tags.append(dict(name = 'div', attrs = {'class' : 'podwpisowe'}))
|
||||||
|
|
||||||
|
|
||||||
|
extra_css = '''
|
||||||
|
body {font-family: verdana, arial, helvetica, geneva, sans-serif ;}
|
||||||
|
'''
|
||||||
|
|
||||||
|
feeds = [
|
||||||
|
(u'Artykuly', u'feed://feeds.feedburner.com/Antyweb?format=xml'),
|
||||||
|
]
|
||||||
|
def preprocess_html(self, soup):
|
||||||
|
for alink in soup.findAll('a'):
|
||||||
|
if alink.string is not None:
|
||||||
|
tstr = alink.string
|
||||||
|
alink.replaceWith(tstr)
|
||||||
|
return soup
|
50
recipes/bankier_pl.recipe
Normal file
50
recipes/bankier_pl.recipe
Normal file
@ -0,0 +1,50 @@
|
|||||||
|
#!/usr/bin/env python
|
||||||
|
|
||||||
|
__license__ = 'GPL v3'
|
||||||
|
__author__ = 'teepel <teepel44@gmail.com>'
|
||||||
|
|
||||||
|
'''
|
||||||
|
bankier.pl
|
||||||
|
'''
|
||||||
|
|
||||||
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
|
||||||
|
class bankier(BasicNewsRecipe):
|
||||||
|
title = u'Bankier.pl'
|
||||||
|
__author__ = 'teepel <teepel44@gmail.com>'
|
||||||
|
language = 'pl'
|
||||||
|
description ='Polski portal finansowy. Informacje o: gospodarka, inwestowanie, finanse osobiste, prowadzenie firmy, kursy walut, notowania akcji, fundusze.'
|
||||||
|
masthead_url='http://www.bankier.pl/gfx/hd-mid-02.gif'
|
||||||
|
INDEX='http://bankier.pl/'
|
||||||
|
remove_empty_feeds= True
|
||||||
|
oldest_article = 1
|
||||||
|
max_articles_per_feed = 100
|
||||||
|
remove_javascript=True
|
||||||
|
no_stylesheets=True
|
||||||
|
simultaneous_downloads = 5
|
||||||
|
|
||||||
|
keep_only_tags =[]
|
||||||
|
keep_only_tags.append(dict(name = 'div', attrs = {'align' : 'left'}))
|
||||||
|
|
||||||
|
remove_tags =[]
|
||||||
|
remove_tags.append(dict(name = 'table', attrs = {'cellspacing' : '2'}))
|
||||||
|
remove_tags.append(dict(name = 'div', attrs = {'align' : 'center'}))
|
||||||
|
remove_tags.append(dict(name = 'img', attrs = {'src' : '/gfx/hd-mid-02.gif'}))
|
||||||
|
#remove_tags.append(dict(name = 'a', attrs = {'target' : '_blank'}))
|
||||||
|
#remove_tags.append(dict(name = 'br', attrs = {'clear' : 'all'}))
|
||||||
|
|
||||||
|
feeds = [
|
||||||
|
(u'Wiadomości dnia', u'http://feeds.feedburner.com/bankier-wiadomosci-dnia'),
|
||||||
|
(u'Finanse osobiste', u'http://feeds.feedburner.com/bankier-finanse-osobiste'),
|
||||||
|
(u'Firma', u'http://feeds.feedburner.com/bankier-firma'),
|
||||||
|
(u'Giełda', u'http://feeds.feedburner.com/bankier-gielda'),
|
||||||
|
(u'Rynek walutowy', u'http://feeds.feedburner.com/bankier-rynek-walutowy'),
|
||||||
|
(u'Komunikaty ze spółek', u'http://feeds.feedburner.com/bankier-espi'),
|
||||||
|
]
|
||||||
|
def print_version(self, url):
|
||||||
|
segment = url.split('.')
|
||||||
|
urlPart = segment[2]
|
||||||
|
segments = urlPart.split('-')
|
||||||
|
urlPart2 = segments[-1]
|
||||||
|
return 'http://www.bankier.pl/wiadomosci/print.html?article_id=' + urlPart2
|
||||||
|
|
35
recipes/f1_ultra.recipe
Normal file
35
recipes/f1_ultra.recipe
Normal file
@ -0,0 +1,35 @@
|
|||||||
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
import re
|
||||||
|
|
||||||
|
class f1ultra(BasicNewsRecipe):
|
||||||
|
title = u'Formuła 1 - F1 ultra'
|
||||||
|
__license__ = 'GPL v3'
|
||||||
|
__author__ = 'MrStefan <mrstefaan@gmail.com>, Artur Stachecki <artur.stachecki@gmail.com>'
|
||||||
|
language = 'pl'
|
||||||
|
description =u'Formuła 1, Robert Kubica, F3, GP2 oraz inne serie wyścigowe.'
|
||||||
|
masthead_url='http://www.f1ultra.pl/templates/f1ultra/images/logo.gif'
|
||||||
|
remove_empty_feeds= True
|
||||||
|
oldest_article = 1
|
||||||
|
max_articles_per_feed = 100
|
||||||
|
remove_javascript=True
|
||||||
|
no_stylesheets=True
|
||||||
|
|
||||||
|
keep_only_tags =[(dict(name = 'div', attrs = {'id' : 'main'}))]
|
||||||
|
remove_tags_after =[dict(attrs = {'style' : 'margin-top:5px;margin-bottom:5px;display: inline;'})]
|
||||||
|
remove_tags =[(dict(attrs = {'class' : ['buttonheading', 'avPlayerContainer', 'createdate']}))]
|
||||||
|
remove_tags.append(dict(attrs = {'title' : ['PDF', 'Drukuj', 'Email']}))
|
||||||
|
remove_tags.append(dict(name = 'form', attrs = {'method' : 'post'}))
|
||||||
|
remove_tags.append(dict(name = 'hr', attrs = {'size' : '2'}))
|
||||||
|
|
||||||
|
preprocess_regexps = [(re.compile(r'align="left"'), lambda match: ''),
|
||||||
|
(re.compile(r'align="right"'), lambda match: ''),
|
||||||
|
(re.compile(r'width=\"*\"'), lambda match: ''),
|
||||||
|
(re.compile(r'\<table .*?\>'), lambda match: '')]
|
||||||
|
|
||||||
|
|
||||||
|
extra_css = '''.contentheading { font-size: 1.4em; font-weight: bold; }
|
||||||
|
img { display: block; clear: both;}
|
||||||
|
'''
|
||||||
|
remove_attributes = ['width','height','position','float','padding-left','padding-right','padding','text-align']
|
||||||
|
|
||||||
|
feeds = [(u'F1 Ultra', u'http://www.f1ultra.pl/index.php?option=com_rd_rss&id=1&Itemid=245')]
|
@ -8,7 +8,6 @@ krakow.gazeta.pl
|
|||||||
'''
|
'''
|
||||||
|
|
||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
import re
|
|
||||||
|
|
||||||
class gw_krakow(BasicNewsRecipe):
|
class gw_krakow(BasicNewsRecipe):
|
||||||
title = u'Gazeta.pl Kraków'
|
title = u'Gazeta.pl Kraków'
|
||||||
@ -46,7 +45,7 @@ class gw_krakow(BasicNewsRecipe):
|
|||||||
remove_tags.append(dict(name = 'div', attrs = {'id' : 'gazeta_article_buttons'}))
|
remove_tags.append(dict(name = 'div', attrs = {'id' : 'gazeta_article_buttons'}))
|
||||||
|
|
||||||
remove_tags_after = [dict(name = 'div', attrs = {'id' : 'gazeta_article_share'})]
|
remove_tags_after = [dict(name = 'div', attrs = {'id' : 'gazeta_article_share'})]
|
||||||
|
|
||||||
feeds = [(u'Wiadomości', u'http://rss.gazeta.pl/pub/rss/krakow.xml')]
|
feeds = [(u'Wiadomości', u'http://rss.gazeta.pl/pub/rss/krakow.xml')]
|
||||||
|
|
||||||
def skip_ad_pages(self, soup):
|
def skip_ad_pages(self, soup):
|
||||||
|
@ -8,7 +8,6 @@ warszawa.gazeta.pl
|
|||||||
'''
|
'''
|
||||||
|
|
||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
import re
|
|
||||||
|
|
||||||
class gw_wawa(BasicNewsRecipe):
|
class gw_wawa(BasicNewsRecipe):
|
||||||
title = u'Gazeta.pl Warszawa'
|
title = u'Gazeta.pl Warszawa'
|
||||||
@ -43,7 +42,7 @@ class gw_wawa(BasicNewsRecipe):
|
|||||||
remove_tags.append(dict(name = 'div', attrs = {'class' : 'gazeta_article_related_new'}))
|
remove_tags.append(dict(name = 'div', attrs = {'class' : 'gazeta_article_related_new'}))
|
||||||
remove_tags.append(dict(name = 'div', attrs = {'class' : 'gazetaVideoPlayer'}))
|
remove_tags.append(dict(name = 'div', attrs = {'class' : 'gazetaVideoPlayer'}))
|
||||||
remove_tags.append(dict(name = 'div', attrs = {'id' : 'gazeta_article_miniatures'}))
|
remove_tags.append(dict(name = 'div', attrs = {'id' : 'gazeta_article_miniatures'}))
|
||||||
|
|
||||||
feeds = [(u'Wiadomości', u'http://rss.gazeta.pl/pub/rss/warszawa.xml')]
|
feeds = [(u'Wiadomości', u'http://rss.gazeta.pl/pub/rss/warszawa.xml')]
|
||||||
|
|
||||||
def skip_ad_pages(self, soup):
|
def skip_ad_pages(self, soup):
|
||||||
|
BIN
recipes/icons/antyweb.png
Normal file
BIN
recipes/icons/antyweb.png
Normal file
Binary file not shown.
After Width: | Height: | Size: 668 B |
BIN
recipes/icons/bankier_pl.png
Normal file
BIN
recipes/icons/bankier_pl.png
Normal file
Binary file not shown.
After Width: | Height: | Size: 190 B |
BIN
recipes/icons/f1_ultra.png
Normal file
BIN
recipes/icons/f1_ultra.png
Normal file
Binary file not shown.
After Width: | Height: | Size: 490 B |
BIN
recipes/icons/myapple_pl.png
Normal file
BIN
recipes/icons/myapple_pl.png
Normal file
Binary file not shown.
After Width: | Height: | Size: 1.1 KiB |
BIN
recipes/icons/telepolis_pl.png
Normal file
BIN
recipes/icons/telepolis_pl.png
Normal file
Binary file not shown.
After Width: | Height: | Size: 1.2 KiB |
49
recipes/myapple_pl.recipe
Normal file
49
recipes/myapple_pl.recipe
Normal file
@ -0,0 +1,49 @@
|
|||||||
|
|
||||||
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
|
||||||
|
class MyAppleRecipe(BasicNewsRecipe):
|
||||||
|
__license__ = 'GPL v3'
|
||||||
|
__author__ = u'Artur Stachecki <artur.stachecki@gmail.com>'
|
||||||
|
language = 'pl'
|
||||||
|
version = 1
|
||||||
|
|
||||||
|
title = u'MyApple.pl'
|
||||||
|
category = u'News'
|
||||||
|
description = u' Największy w Polsce serwis zajmujący się tematyką związaną z Apple i wszelkimi produktami tej firmy.'
|
||||||
|
cover_url=''
|
||||||
|
remove_empty_feeds= True
|
||||||
|
no_stylesheets=True
|
||||||
|
oldest_article = 7
|
||||||
|
max_articles_per_feed = 100000
|
||||||
|
recursions = 0
|
||||||
|
|
||||||
|
no_stylesheets = True
|
||||||
|
remove_javascript = True
|
||||||
|
simultaneous_downloads = 3
|
||||||
|
|
||||||
|
keep_only_tags =[]
|
||||||
|
keep_only_tags.append(dict(name = 'div', attrs = {'id' : 'article_content'}))
|
||||||
|
|
||||||
|
remove_tags =[]
|
||||||
|
remove_tags.append(dict(name = 'div', attrs = {'class' : 'article_author_date_comment_container'}))
|
||||||
|
remove_tags.append(dict(name = 'div', attrs = {'class' : 'fullwidth'}))
|
||||||
|
remove_tags.append(dict(name = 'div', attrs = {'class' : 'cmslinks'}))
|
||||||
|
remove_tags.append(dict(name = 'div', attrs = {'class' : 'googleads-468'}))
|
||||||
|
remove_tags.append(dict(name = 'div', attrs = {'id' : 'comments'}))
|
||||||
|
|
||||||
|
|
||||||
|
extra_css = '''
|
||||||
|
body {font-family: verdana, arial, helvetica, geneva, sans-serif ;}
|
||||||
|
td.contentheading{font-size: large; font-weight: bold;}
|
||||||
|
'''
|
||||||
|
|
||||||
|
feeds = [
|
||||||
|
('News', 'feed://myapple.pl/external.php?do=rss&type=newcontent§ionid=1&days=120&count=10'),
|
||||||
|
]
|
||||||
|
|
||||||
|
def preprocess_html(self, soup):
|
||||||
|
for alink in soup.findAll('a'):
|
||||||
|
if alink.string is not None:
|
||||||
|
tstr = alink.string
|
||||||
|
alink.replaceWith(tstr)
|
||||||
|
return soup
|
67
recipes/telepolis_pl.recipe
Normal file
67
recipes/telepolis_pl.recipe
Normal file
@ -0,0 +1,67 @@
|
|||||||
|
#!/usr/bin/env python
|
||||||
|
|
||||||
|
__license__ = 'GPL v3'
|
||||||
|
|
||||||
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
import re
|
||||||
|
|
||||||
|
|
||||||
|
class telepolis(BasicNewsRecipe):
|
||||||
|
title = u'Telepolis.pl'
|
||||||
|
__author__ = 'Artur Stachecki <artur.stachecki@gmail.com>'
|
||||||
|
language = 'pl'
|
||||||
|
description = u'Twój telekomunikacyjny serwis informacyjny.\
|
||||||
|
Codzienne informacje, testy i artykuły,\
|
||||||
|
promocje, baza telefonów oraz centrum rozrywki'
|
||||||
|
oldest_article = 7
|
||||||
|
masthead_url = 'http://telepolis.pl/i/telepolis-logo2.gif'
|
||||||
|
max_articles_per_feed = 100
|
||||||
|
simultaneous_downloads = 5
|
||||||
|
remove_javascript = True
|
||||||
|
no_stylesheets = True
|
||||||
|
use_embedded_content = False
|
||||||
|
|
||||||
|
remove_tags = []
|
||||||
|
remove_tags.append(dict(attrs={'alt': 'TELEPOLIS.pl'}))
|
||||||
|
|
||||||
|
preprocess_regexps = [(re.compile(r'<: .*? :>'),
|
||||||
|
lambda match: ''),
|
||||||
|
(re.compile(r'<b>Zobacz:</b>.*?</a>', re.DOTALL),
|
||||||
|
lambda match: ''),
|
||||||
|
(re.compile(r'<-ankieta.*?>'),
|
||||||
|
lambda match: ''),
|
||||||
|
(re.compile(r'\(Q\!\)'),
|
||||||
|
lambda match: ''),
|
||||||
|
(re.compile(r'\(plik.*?\)'),
|
||||||
|
lambda match: ''),
|
||||||
|
(re.compile(r'<br.*?><br.*?>', re.DOTALL),
|
||||||
|
lambda match: '')
|
||||||
|
]
|
||||||
|
|
||||||
|
extra_css = '''.tb { font-weight: bold; font-size: 20px;}'''
|
||||||
|
|
||||||
|
feeds = [
|
||||||
|
(u'Wiadomości', u'http://www.telepolis.pl/rss/news.php'),
|
||||||
|
(u'Artykuły', u'http://www.telepolis.pl/rss/artykuly.php')
|
||||||
|
]
|
||||||
|
|
||||||
|
def print_version(self, url):
|
||||||
|
if 'news.php' in url:
|
||||||
|
print_url = url.replace('news.php', 'news_print.php')
|
||||||
|
else:
|
||||||
|
print_url = url.replace('artykuly.php', 'art_print.php')
|
||||||
|
return print_url
|
||||||
|
|
||||||
|
def preprocess_html(self, soup):
|
||||||
|
for image in soup.findAll('img'):
|
||||||
|
if 'm.jpg' in image['src']:
|
||||||
|
image_big = image['src']
|
||||||
|
image_big = image_big.replace('m.jpg', '.jpg')
|
||||||
|
image['src'] = image_big
|
||||||
|
logo = soup.find('tr')
|
||||||
|
logo.extract()
|
||||||
|
for tag in soup.findAll('tr'):
|
||||||
|
for strings in ['Wiadomość wydrukowana', 'copyright']:
|
||||||
|
if strings in self.tag_to_string(tag):
|
||||||
|
tag.extract()
|
||||||
|
return self.adeify_images(soup)
|
Loading…
x
Reference in New Issue
Block a user