mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
add some recipes and icons from kalibrator, part 1
This commit is contained in:
parent
5d9b34cd17
commit
98b7cd3e4b
BIN
recipes/icons/mateusz_czytania.png
Normal file
BIN
recipes/icons/mateusz_czytania.png
Normal file
Binary file not shown.
After Width: | Height: | Size: 1.1 KiB |
BIN
recipes/icons/rushisaband.png
Normal file
BIN
recipes/icons/rushisaband.png
Normal file
Binary file not shown.
After Width: | Height: | Size: 965 B |
BIN
recipes/icons/rynek_infrastruktury.png
Normal file
BIN
recipes/icons/rynek_infrastruktury.png
Normal file
Binary file not shown.
After Width: | Height: | Size: 820 B |
BIN
recipes/icons/rynek_kolejowy.png
Normal file
BIN
recipes/icons/rynek_kolejowy.png
Normal file
Binary file not shown.
After Width: | Height: | Size: 330 B |
BIN
recipes/icons/satkurier.png
Normal file
BIN
recipes/icons/satkurier.png
Normal file
Binary file not shown.
After Width: | Height: | Size: 1.2 KiB |
36
recipes/kerrang.recipe
Normal file
36
recipes/kerrang.recipe
Normal file
@ -0,0 +1,36 @@
|
||||
#!/usr/bin/env python
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
import re
|
||||
|
||||
|
||||
class kerrang(BasicNewsRecipe):
|
||||
title = u'Kerrang!'
|
||||
__author__ = 'Artur Stachecki <artur.stachecki@gmail.com>'
|
||||
language = 'en'
|
||||
description = u'UK-based magazine devoted to rock music published by Bauer Media Group'
|
||||
oldest_article = 7
|
||||
masthead_url = 'http://images.kerrang.com/design/kerrang/kerrangsite/logo.gif'
|
||||
max_articles_per_feed = 100
|
||||
simultaneous_downloads = 5
|
||||
remove_javascript = True
|
||||
no_stylesheets = True
|
||||
use_embedded_content = False
|
||||
recursions = 0
|
||||
|
||||
keep_only_tags = []
|
||||
keep_only_tags.append(dict(attrs = {'class' : ['headz', 'blktxt']}))
|
||||
|
||||
extra_css = ''' img { display: block; margin-right: auto;}
|
||||
h1 {text-align: left; font-size: 22px;}'''
|
||||
|
||||
feeds = [(u'News', u'http://www.kerrang.com/blog/rss.xml')]
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
for alink in soup.findAll('a'):
|
||||
if alink.string is not None:
|
||||
tstr = alink.string
|
||||
alink.replaceWith(tstr)
|
||||
return soup
|
46
recipes/lequipe.recipe
Normal file
46
recipes/lequipe.recipe
Normal file
@ -0,0 +1,46 @@
|
||||
#!/usr/bin/env python
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
import re
|
||||
|
||||
|
||||
class leequipe(BasicNewsRecipe):
|
||||
title = u'l\'equipe'
|
||||
__author__ = 'Artur Stachecki <artur.stachecki@gmail.com>'
|
||||
language = 'fr'
|
||||
description = u'Retrouvez tout le sport en direct sur le site de L\'EQUIPE et suivez l\'actualité du football, rugby, basket, cyclisme, f1, volley, hand, tous les résultats sportifs'
|
||||
oldest_article = 1
|
||||
masthead_url = 'http://static.lequipe.fr/v6/img/logo-lequipe.png'
|
||||
max_articles_per_feed = 100
|
||||
simultaneous_downloads = 5
|
||||
remove_javascript = True
|
||||
no_stylesheets = True
|
||||
use_embedded_content = False
|
||||
recursions = 0
|
||||
|
||||
keep_only_tags = []
|
||||
keep_only_tags.append(dict(attrs={'id': ['article']}))
|
||||
|
||||
remove_tags = []
|
||||
remove_tags.append(dict(attrs={'id': ['partage', 'ensavoirplus', 'bloc_bas_breve', 'commentaires', 'tools']}))
|
||||
remove_tags.append(dict(attrs={'class': ['partage_bis', 'date']}))
|
||||
|
||||
feeds = [(u'Football', u'http://www.lequipe.fr/rss/actu_rss_Football.xml'),
|
||||
(u'Auto-Moto', u'http://www.lequipe.fr/rss/actu_rss_Auto-Moto.xml'),
|
||||
(u'Tennis', u'http://www.lequipe.fr/rss/actu_rss_Tennis.xml'),
|
||||
(u'Golf', u'http://www.lequipe.fr/rss/actu_rss_Golf.xml'),
|
||||
(u'Rugby', u'http://www.lequipe.fr/rss/actu_rss_Rugby.xml'),
|
||||
(u'Basket', u'http://www.lequipe.fr/rss/actu_rss_Basket.xml'),
|
||||
(u'Hand', u'http://www.lequipe.fr/rss/actu_rss_Hand.xml'),
|
||||
(u'Cyclisme', u'http://www.lequipe.fr/rss/actu_rss_Cyclisme.xml'),
|
||||
(u'Autres Sports', u'http://pipes.yahoo.com/pipes/pipe.run?_id=2039f7f4f350c70c5e4e8633aa1b37cd&_render=rss')
|
||||
]
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
for alink in soup.findAll('a'):
|
||||
if alink.string is not None:
|
||||
tstr = alink.string
|
||||
alink.replaceWith(tstr)
|
||||
return soup
|
37
recipes/mateusz_czytania.recipe
Normal file
37
recipes/mateusz_czytania.recipe
Normal file
@ -0,0 +1,37 @@
|
||||
#!/usr/bin/env python
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__author__ = 'teepel <teepel44@gmail.com>'
|
||||
|
||||
'''
|
||||
http://www.mateusz.pl/czytania
|
||||
'''
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
import re
|
||||
|
||||
class czytania_mateusz(BasicNewsRecipe):
|
||||
title = u'Czytania na ka\u017cdy dzie\u0144'
|
||||
__author__ = 'teepel <teepel44@gmail.com>'
|
||||
description = u'Codzienne czytania z jednego z najstarszych polskich serwisów katolickich.'
|
||||
language = 'pl'
|
||||
INDEX='http://www.mateusz.pl/czytania'
|
||||
oldest_article = 1
|
||||
remove_empty_feeds= True
|
||||
no_stylesheets=True
|
||||
auto_cleanup = True
|
||||
remove_javascript = True
|
||||
simultaneous_downloads = 2
|
||||
max_articles_per_feed = 100
|
||||
auto_cleanup = True
|
||||
|
||||
feeds = [(u'Czytania', u'http://mateusz.pl/rss/czytania/')]
|
||||
|
||||
remove_tags =[]
|
||||
remove_tags.append(dict(name = 'p', attrs = {'class' : 'top'}))
|
||||
|
||||
#thanks t3d
|
||||
def get_article_url(self, article):
|
||||
link = article.get('link')
|
||||
if 'kmt.pl' not in link:
|
||||
return link
|
61
recipes/naszdziennik.recipe
Normal file
61
recipes/naszdziennik.recipe
Normal file
@ -0,0 +1,61 @@
|
||||
#!/usr/bin/env python
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class naszdziennik(BasicNewsRecipe):
|
||||
title = u'Nasz Dziennik'
|
||||
__author__ = 'Artur Stachecki <artur.stachecki@gmail.com>'
|
||||
language = 'pl'
|
||||
description =u'Nasz Dziennik - Ogólnopolska gazeta codzienna. Podejmuje tematykę dotyczącą życia społecznego, kulturalnego, politycznego i religijnego. Propaguje wartości chrześcijańskie oraz tradycję i kulturę polską.'
|
||||
masthead_url='http://www.naszdziennik.pl/images/logo-male.png'
|
||||
max_articles_per_feed = 100
|
||||
remove_javascript=True
|
||||
no_stylesheets = True
|
||||
|
||||
keep_only_tags =[dict(attrs = {'id' : 'article'})]
|
||||
|
||||
#definiujemy nową funkcje; musi zwracać listę feedów wraz z artykułami
|
||||
def parse_index(self):
|
||||
#adres do parsowania artykułów
|
||||
soup = self.index_to_soup('http://www.naszdziennik.pl/news')
|
||||
#deklaracja pustej listy feedów
|
||||
feeds = []
|
||||
#deklaracja pustego słownika artykułów
|
||||
articles = {}
|
||||
#deklaracja pustej listy sekcji
|
||||
sections = []
|
||||
#deklaracja pierwszej sekcji jako pusty string
|
||||
section = ''
|
||||
|
||||
#pętla for, która analizuje po kolei każdy tag "news-article"
|
||||
for item in soup.findAll(attrs = {'class' : 'news-article'}) :
|
||||
#w tagu "news-article szukamy pierwszego taga h4"
|
||||
section = item.find('h4')
|
||||
#zmiennej sekcja przypisujemy zawartość tekstową taga
|
||||
section = self.tag_to_string(section)
|
||||
#sprawdzamy czy w słowniku artykułów istnieje klucz dotyczący sekcji
|
||||
#jeśli nie istnieje to :
|
||||
if not articles.has_key(section) :
|
||||
#do listy sekcji dodajemy nową sekcje
|
||||
sections.append(section)
|
||||
#deklarujemy nową sekcje w słowniku artykułów przypisując jej klucz odpowiadający nowej sekcji, którego wartością jest pusta lista
|
||||
articles[section] = []
|
||||
#przeszukujemy kolejny tag "title-datetime"
|
||||
article_title_datetime = item.find(attrs = {'class' : 'title-datetime'})
|
||||
#w tagu title-datetime znajdujemy pierwszy link
|
||||
article_a = article_title_datetime.find('a')
|
||||
#i tworzymy z niego link absolutny do właściwego artykułu
|
||||
article_url = 'http://naszdziennik.pl' + article_a['href']
|
||||
#jako tytuł użyty będzie tekst pomiędzy tagami <a>
|
||||
article_title = self.tag_to_string(article_a)
|
||||
#a data będzie tekstem z pierwszego taga h4 znalezionego w tagu title-datetime
|
||||
article_date = self.tag_to_string(article_title_datetime.find('h4'))
|
||||
#zebrane elementy dodajemy do listy zadeklarowanej w linijce 44
|
||||
articles[section].append( { 'title' : article_title, 'url' : article_url, 'date' : article_date })
|
||||
#po dodaniu wszystkich artykułów dodajemy sekcje do listy feedów, korzystając z list sekcji znajdujących się w słowniku
|
||||
for section in sections:
|
||||
feeds.append((section, articles[section]))
|
||||
#zwracamy listę feedów, której parsowaniem zajmie się calibre
|
||||
return feeds
|
29
recipes/rushisaband.recipe
Normal file
29
recipes/rushisaband.recipe
Normal file
@ -0,0 +1,29 @@
|
||||
#!/usr/bin/env python
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__author__ = 'MrStefan <mrstefaan@gmail.com>'
|
||||
|
||||
'''
|
||||
www.rushisaband.com
|
||||
'''
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
import re
|
||||
|
||||
class rushisaband(BasicNewsRecipe):
|
||||
title = u'Rushisaband'
|
||||
__author__ = 'MrStefan <mrstefaan@gmail.com>'
|
||||
language = 'en'
|
||||
description =u'A blog devoted to the band RUSH and its members, Neil Peart, Geddy Lee and Alex Lifeson'
|
||||
remove_empty_feeds= True
|
||||
oldest_article = 7
|
||||
max_articles_per_feed = 100
|
||||
remove_javascript=True
|
||||
no_stylesheets=True
|
||||
|
||||
keep_only_tags =[]
|
||||
keep_only_tags.append(dict(name = 'h4'))
|
||||
keep_only_tags.append(dict(name = 'h5'))
|
||||
keep_only_tags.append(dict(name = 'p'))
|
||||
|
||||
feeds = [(u'Rush is a Band', u'http://feeds2.feedburner.com/rushisaband/blog')]
|
42
recipes/rynek_infrastruktury.recipe
Normal file
42
recipes/rynek_infrastruktury.recipe
Normal file
@ -0,0 +1,42 @@
|
||||
#!/usr/bin/env python
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__author__ = 'teepel <teepel44@gmail.com>'
|
||||
|
||||
'''
|
||||
http://www.rynekinfrastruktury.pl
|
||||
'''
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
import re
|
||||
|
||||
class prawica_recipe(BasicNewsRecipe):
|
||||
title = u'Rynek Infrastruktury'
|
||||
__author__ = 'teepel <teepel44@gmail.com>'
|
||||
language = 'pl'
|
||||
description =u'Portal "Rynek Infrastruktury" to źródło informacji o kluczowych elementach polskiej gospodarki: drogach, kolei, lotniskach, portach, telekomunikacji, energetyce, prawie i polityce, wzmocnione eksperckimi komentarzami kluczowych analityków.'
|
||||
remove_empty_feeds= True
|
||||
oldest_article = 1
|
||||
max_articles_per_feed = 100
|
||||
remove_javascript=True
|
||||
no_stylesheets=True
|
||||
|
||||
feeds = [
|
||||
(u'Drogi', u'http://www.rynekinfrastruktury.pl/rss/41'),
|
||||
(u'Lotniska', u'http://www.rynekinfrastruktury.pl/rss/42'),
|
||||
(u'Kolej', u'http://www.rynekinfrastruktury.pl/rss/37'),
|
||||
(u'Energetyka', u'http://www.rynekinfrastruktury.pl/rss/30'),
|
||||
(u'Telekomunikacja', u'http://www.rynekinfrastruktury.pl/rss/31'),
|
||||
(u'Porty', u'http://www.rynekinfrastruktury.pl/rss/32'),
|
||||
(u'Prawo i polityka', u'http://www.rynekinfrastruktury.pl/rss/47'),
|
||||
(u'Komentarze', u'http://www.rynekinfrastruktury.pl/rss/38'),
|
||||
]
|
||||
|
||||
keep_only_tags =[]
|
||||
keep_only_tags.append(dict(name = 'div', attrs = {'class' : 'articleContent'}))
|
||||
|
||||
remove_tags =[]
|
||||
remove_tags.append(dict(name = 'span', attrs = {'class' : 'date'}))
|
||||
|
||||
def print_version(self, url):
|
||||
return url.replace('http://www.rynekinfrastruktury.pl/artykul/', 'http://www.rynekinfrastruktury.pl/artykul/drukuj/')
|
41
recipes/rynek_kolejowy.recipe
Normal file
41
recipes/rynek_kolejowy.recipe
Normal file
@ -0,0 +1,41 @@
|
||||
#!/usr/bin/env python
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__author__ = 'teepel <teepel44@gmail.com>'
|
||||
|
||||
'''
|
||||
rynek-kolejowy.pl
|
||||
'''
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
import re
|
||||
|
||||
class rynek_kolejowy(BasicNewsRecipe):
|
||||
title = u'Rynek Kolejowy'
|
||||
__author__ = 'teepel <teepel44@gmail.com>'
|
||||
language = 'pl'
|
||||
description =u'Rynek Kolejowy - kalendarium wydarzeń branży kolejowej, konferencje, sympozja, targi kolejowe, krajowe i zagraniczne.'
|
||||
masthead_url='http://p.wnp.pl/images/i/partners/rynek_kolejowy.gif'
|
||||
remove_empty_feeds= True
|
||||
oldest_article = 1
|
||||
max_articles_per_feed = 100
|
||||
remove_javascript=True
|
||||
no_stylesheets=True
|
||||
|
||||
keep_only_tags =[]
|
||||
keep_only_tags.append(dict(name = 'div', attrs = {'id' : 'mainContent'}))
|
||||
|
||||
remove_tags =[]
|
||||
remove_tags.append(dict(name = 'div', attrs = {'class' : 'right no-print'}))
|
||||
remove_tags.append(dict(name = 'div', attrs = {'id' : 'font-size'}))
|
||||
remove_tags.append(dict(name = 'div', attrs = {'class' : 'no-print'}))
|
||||
|
||||
extra_css = '''.wiadomosc_title{ font-size: 1.4em; font-weight: bold; }'''
|
||||
|
||||
feeds = [(u'Wiadomości', u'http://www.rynek-kolejowy.pl/rss/rss.php')]
|
||||
|
||||
def print_version(self, url):
|
||||
segment = url.split('/')
|
||||
urlPart = segment[3]
|
||||
return 'http://www.rynek-kolejowy.pl/drukuj.php?id=' + urlPart
|
||||
|
49
recipes/satkurier.recipe
Normal file
49
recipes/satkurier.recipe
Normal file
@ -0,0 +1,49 @@
|
||||
#!/usr/bin/env python
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
import re
|
||||
|
||||
|
||||
class SATKurier(BasicNewsRecipe):
|
||||
title = u'SATKurier.pl'
|
||||
__author__ = 'Artur Stachecki <artur.stachecki@gmail.com>'
|
||||
language = 'pl'
|
||||
description = u'Największy i najstarszy serwis poświęcony\
|
||||
telewizji cyfrowej, przygotowywany przez wydawcę\
|
||||
miesięcznika SAT Kurier. Bieżące wydarzenia\
|
||||
z rynku mediów i nowych technologii.'
|
||||
oldest_article = 7
|
||||
masthead_url = 'http://satkurier.pl/img/header_sk_logo.gif'
|
||||
max_articles_per_feed = 100
|
||||
simultaneous_downloads = 5
|
||||
remove_javascript = True
|
||||
no_stylesheets = True
|
||||
|
||||
keep_only_tags = []
|
||||
keep_only_tags.append(dict(name='div', attrs={'id': ['single_news', 'content']}))
|
||||
|
||||
remove_tags = []
|
||||
remove_tags.append(dict(attrs={'id': ['news_info', 'comments']}))
|
||||
remove_tags.append(dict(attrs={'href': '#czytaj'}))
|
||||
remove_tags.append(dict(attrs={'align': 'center'}))
|
||||
remove_tags.append(dict(attrs={'class': ['date', 'category', 'right mini-add-comment', 'socialLinks', 'commentlist']}))
|
||||
|
||||
remove_tags_after = [(dict(id='entry'))]
|
||||
|
||||
feeds = [(u'Najnowsze wiadomości', u'http://feeds.feedburner.com/satkurierpl?format=xml'),
|
||||
(u'Sport w telewizji', u'http://feeds.feedburner.com/satkurier/sport?format=xml'),
|
||||
(u'Blog', u'http://feeds.feedburner.com/satkurier/blog?format=xml')]
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
image = soup.find(attrs={'id': 'news_mini_photo'})
|
||||
if image:
|
||||
image.extract()
|
||||
header = soup.find('h1')
|
||||
header.replaceWith(header.prettify() + image.prettify())
|
||||
for alink in soup.findAll('a'):
|
||||
if alink.string is not None:
|
||||
tstr = alink.string
|
||||
alink.replaceWith(tstr)
|
||||
return soup
|
Loading…
x
Reference in New Issue
Block a user