Merge from trunk

This commit is contained in:
Charles Haley 2012-11-26 14:03:32 +01:00
commit f3e7bd0cba
414 changed files with 144799 additions and 95695 deletions

View File

@ -39,3 +39,45 @@ recipes/.git
recipes/.gitignore
recipes/README
recipes/katalog_egazeciarz.recipe
recipes/tv_axnscifi.recipe
recipes/tv_comedycentral.recipe
recipes/tv_discoveryscience.recipe
recipes/tv_foxlife.recipe
recipes/tv_fox.recipe
recipes/tv_hbo.recipe
recipes/tv_kinopolska.recipe
recipes/tv_nationalgeographic.recipe
recipes/tv_polsat2.recipe
recipes/tv_polsat.recipe
recipes/tv_tv4.recipe
recipes/tv_tvn7.recipe
recipes/tv_tvn.recipe
recipes/tv_tvp1.recipe
recipes/tv_tvp2.recipe
recipes/tv_tvphd.recipe
recipes/tv_tvphistoria.recipe
recipes/tv_tvpkultura.recipe
recipes/tv_tvppolonia.recipe
recipes/tv_tvpuls.recipe
recipes/tv_viasathistory.recipe
recipes/icons/tv_axnscifi.png
recipes/icons/tv_comedycentral.png
recipes/icons/tv_discoveryscience.png
recipes/icons/tv_foxlife.png
recipes/icons/tv_fox.png
recipes/icons/tv_hbo.png
recipes/icons/tv_kinopolska.png
recipes/icons/tv_nationalgeographic.png
recipes/icons/tv_polsat2.png
recipes/icons/tv_polsat.png
recipes/icons/tv_tv4.png
recipes/icons/tv_tvn7.png
recipes/icons/tv_tvn.png
recipes/icons/tv_tvp1.png
recipes/icons/tv_tvp2.png
recipes/icons/tv_tvphd.png
recipes/icons/tv_tvphistoria.png
recipes/icons/tv_tvpkultura.png
recipes/icons/tv_tvppolonia.png
recipes/icons/tv_tvpuls.png
recipes/icons/tv_viasathistory.png

View File

@ -19,6 +19,144 @@
# new recipes:
# - title:
- version: 0.9.7
date: 2012-11-23
new features:
- title: "Edit metadata dialog: Show the size of the current book cover in the edit metadata dialog."
tickets: [1079781]
- title: "Get Books: Allow easy searching by title and author in addition to any keyword, to prevent large numbers of spurious matches."
- title: "An option to automatically convert any added book to the current output format, found under Preferences->Adding books"
- title: "E-book viewer: Allow viewing tables in a separate popup window by right clicking on the table and selecting 'View table'. Useful for reference books that have lots of large tables."
tickets: [1080710]
- title: "Catalogs: Add the current library name as an available field when generating catalogs in csv/xml format."
tickets: [1078422]
- title: "Enable colored text in the output from the command line tools on windows"
- title: "E-book viewer: Add an option to hide the help message when entering full screen mode"
- title: "E-book viewer: Add an option to always start the viewer in full screen mode"
- title: "E-book viewer: Add many more controls to the context menu, particularly useful in full screen mode"
- title: "E-book viewer: Allow easy searching of the selected word or phrase in google via the context menu"
- title: "Add a new type of FileType plugin, postimport, that runs after a book has been added to the database."
- title: "Get Books: Remove Gandalf store, add Publio store. Update the Legimi store plugin for website changes"
bug fixes:
- title: "Conversion: Correctly handle values of left and right for the deprecated align attribute of images, mapping them to the CSS float property instead of to text-align."
tickets: [1081094]
- title: "MOBI Output: When generating joint MOBI6/KF8 files do not set incorrect display CSS values for tables in the KF8 part"
- title: "Connect to iTunes: Ignore AAC audio files."
tickets: [1081096]
- title: "E-book viewer: Fix restoring from fullscreen not respecting maximized window state"
- title: "Fix rows in the device books view sometimes being too high"
- title: "Catalogs: Fixed a problem occurring when merging comments with a custom field whose type is a list."
- title: "Linux binary: Use exec in the wrapper shell scripts that are used to set env vars and launch calibre utilities."
tickets: [1077884]
- title: "E-book viewer: Fix blank pages after every page when viewing some comic files in paged mode"
- title: "E-book viewer: When printing, respect the specified page range."
tickets: [1074220]
- title: "Font subsetting: Parse the GSUB table for glyph substitution rules and do not remove any glyphs that could act as substitutes. Keep zero length glyphs like the glyphs for non printable characters when subsetting TrueType outlines."
- title: "Smarten punctuation: Fix self closing script tags causing smarten punctuation to fail"
improved recipes:
- Arguments and facts
- Business Standard
- The New Yorker
new recipes:
- title: Various Czech and Hungarian news sources
author: bubak
- title: Various Polish recipes
author: Artur Stachecki
- title: Buchreport
author: a.peter
- title: Red Voltaire
author: atordo
- title: Autosport
author: Mr Stefan
- title: House News
author: Eddie Lau
- version: 0.9.6
date: 2012-11-10
new features:
- title: "Experimental support for subsetting fonts"
description: "Subsetting a font means reducing the font to contain only the glyphs for the text actually present in the book. This can easily halve the size of the font. calibre can now do this for all embedded fonts during a conversion. Turn it on via the 'Subset all embedded fonts' option under the Look & Feel section of the conversion dialog. calibre can subset both TrueType and OpenType fonts. Note that this code is very new and likely has bugs, so please check the output if you turn on subsetting. The conversion log will have info about the subsetting operations."
type: major
- title: "EPUB Input: Try to workaround EPUBs that have missing or damaged ZIP central directories. calibre should now be able to read/convert such an EPUB file, provided it does not suffer from further corruption."
- title: "Allow using identifiers in save to disk templates."
tickets: [1074623]
- title: "calibredb: Add an option to not notify the GUI"
- title: "Catalogs: Fix long tags causing catalog generation to fail on windows. Add the ability to cross-reference authors, i.e. to relist the authors for a book with multiple authors separately."
tickets: [1074931]
- title: "Edit metadata dialog: Add a clear tags button to remove all tags with a single click"
- title: "Add search to the font family chooser dialog"
bug fixes:
- title: "Windows: Fix a long standing bug in the device eject code that for some reason only manifested in 0.9.5."
tickets: [1075782]
- title: "Get Books: Fix Amazon stores, Google Books store and libri.de"
- title: "Kobo driver: More fixes for on device book matching, and list books as being on device even if the Kobo has not yet indexed them. Also some performance improvements."
tickets: [1069617]
- title: "EPUB Output: Remove duplicate id and name attributes to eliminate pointless noise from the various epub check utilities"
- title: "Ask for confirmation before removing plugins"
- title: "Fix bulk convert queueing dialog becoming very long if any of the books have a very long title."
tickets: [1076191]
- title: "Fix deleting custom column tags like data from the Tag browser not updating the last modified timestamp for affected books"
tickets: [1075476]
- title: "When updating a previously broken plugin, do not show an error message because the previous version of the plugin cannot be loaded"
- title: "Fix regression that broke the Template Editor"
improved recipes:
- Various updated Polish recipes
- London Review of Books
- Yemen Times
new recipes:
- title: "Various Polish news sources"
author: Artur Stachecki
- version: 0.9.5
date: 2012-11-02

View File

@ -649,20 +649,24 @@ If it still wont launch, start a command prompt (press the windows key and R; th
Post any output you see in a help message on the `Forum <http://www.mobileread.com/forums/forumdisplay.php?f=166>`_.
|app| freezes when I click on anything?
|app| freezes/crashes occasionally?
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
There are three possible things I know of, that can cause this:
* You recently connected an external monitor or TV to your computer. In this case, whenever |app| opens a new window like the edit metadata window or the conversion dialog, it appears on the second monitor where you dont notice it and so you think |app| has frozen. Disconnect your second monitor and restart calibre.
* You recently connected an external monitor or TV to your computer. In
this case, whenever |app| opens a new window like the edit metadata
window or the conversion dialog, it appears on the second monitor where
you dont notice it and so you think |app| has frozen. Disconnect your
second monitor and restart calibre.
* You are using a Wacom branded mouse. There is an incompatibility between Wacom mice and the graphics toolkit |app| uses. Try using a non-Wacom mouse.
* You are using a Wacom branded mouse. There is an incompatibility between
Wacom mice and the graphics toolkit |app| uses. Try using a non-Wacom
mouse.
* If you use RoboForm, it is known to cause |app| to crash. Add |app| to
the blacklist of programs inside RoboForm to fix this.
* Sometimes if some software has installed lots of new files in your fonts folder, |app| can crash until it finishes indexing them. Just start |app|, then leave it alone for about 20 minutes, without clicking on anything. After that you should be able to use |app| as normal.
the blacklist of programs inside RoboForm to fix this. Or uninstall
RoboForm.
|app| is not starting on OS X?
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
@ -721,8 +725,8 @@ You can switch |app| to using a backed up library folder by simply clicking the
If you want to backup the |app| configuration/plugins, you have to backup the config directory. You can find this config directory via :guilabel:`Preferences->Miscellaneous`. Note that restoring configuration directories is not officially supported, but should work in most cases. Just copy the contents of the backup directory into the current configuration directory to restore.
How do I use purchased EPUB books with |app|?
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
How do I use purchased EPUB books with |app| (or what do I do with .acsm files)?
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
Most purchased EPUB books have `DRM <http://drmfree.calibre-ebook.com/about#drm>`_. This prevents |app| from opening them. You can still use |app| to store and transfer them to your ebook reader. First, you must authorize your reader on a windows machine with Adobe Digital Editions. Once this is done, EPUB books transferred with |app| will work fine on your reader. When you purchase an epub book from a website, you will get an ".acsm" file. This file should be opened with Adobe Digital Editions, which will then download the actual ".epub" ebook. The ebook file will be stored in the folder "My Digital Editions", from where you can add it to |app|.
I am getting a "Permission Denied" error?

View File

@ -1,5 +1,5 @@
__license__ = 'GPL v3'
__copyright__ = '2010, Darko Miletic <darko.miletic at gmail.com>'
__copyright__ = '2010 - 2012, Darko Miletic <darko.miletic at gmail.com>'
'''
www.aif.ru
'''
@ -19,12 +19,19 @@ class AIF_ru(BasicNewsRecipe):
encoding = 'cp1251'
language = 'ru'
publication_type = 'magazine'
extra_css = ' @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)} body{font-family: Verdana,Arial,Helvetica,sans1,sans-serif} '
keep_only_tags = [dict(name='div',attrs={'id':'inner'})]
masthead_url = 'http://static3.aif.ru/glossy/index/i/logo.png'
extra_css = """
@font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)}
body{font-family: Verdana,Arial,Helvetica,sans1,sans-serif}
img{display: block}
"""
keep_only_tags = [
dict(name='div',attrs={'class':['content-header', 'zoom']})
,dict(name='div',attrs={'id':'article-text'})
]
remove_tags = [
dict(name=['iframe','object','link','base','input','img'])
,dict(name='div',attrs={'class':'photo'})
,dict(name='p',attrs={'class':'resizefont'})
dict(name=['iframe','object','link','base','input','meta'])
,dict(name='div',attrs={'class':'in-topic'})
]
feeds = [(u'News', u'http://www.aif.ru/rss/all.php')]

View File

@ -0,0 +1,69 @@
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
from __future__ import unicode_literals
from calibre.web.feeds.recipes import BasicNewsRecipe
import re
class aktualneRecipe(BasicNewsRecipe):
__author__ = 'bubak'
title = u'aktualne.cz'
publisher = u'Centrum holdings'
description = 'aktuálně.cz'
oldest_article = 1
max_articles_per_feed = 20
feeds = [
(u'Domácí', u'http://aktualne.centrum.cz/feeds/rss/domaci/?photo=0'),
(u'Zprávy', u'http://aktualne.centrum.cz/feeds/rss/zpravy/?photo=0'),
(u'Praha', u'http://aktualne.centrum.cz/feeds/rss/domaci/regiony/praha/?photo=0'),
(u'Ekonomika', u'http://aktualne.centrum.cz/feeds/rss/ekonomika/?photo=0'),
(u'Finance', u'http://aktualne.centrum.cz/feeds/rss/finance/?photo=0'),
(u'Blogy a názory', u'http://blog.aktualne.centrum.cz/export-all.php')
]
language = 'cs'
cover_url = 'http://img.aktualne.centrum.cz/design/akt4/o/l/logo-akt-ciste.png'
remove_javascript = True
no_stylesheets = True
remove_attributes = []
remove_tags_before = dict(name='h1', attrs={'class':['titulek-clanku']})
filter_regexps = [r'img.aktualne.centrum.cz']
remove_tags = [dict(name='div', attrs={'id':['social-bookmark']}),
dict(name='div', attrs={'class':['box1', 'svazane-tagy']}),
dict(name='div', attrs={'class':'itemcomment id0'}),
dict(name='div', attrs={'class':'hlavicka'}),
dict(name='div', attrs={'class':'hlavni-menu'}),
dict(name='div', attrs={'class':'top-standard-brand-obal'}),
dict(name='div', attrs={'class':'breadcrumb'}),
dict(name='div', attrs={'id':'start-standard'}),
dict(name='div', attrs={'id':'forum'}),
dict(name='span', attrs={'class':'akce'}),
dict(name='span', attrs={'class':'odrazka vetsi'}),
dict(name='div', attrs={'class':'boxP'}),
dict(name='div', attrs={'class':'box2'})]
preprocess_regexps = [
(re.compile(r'<div class="(contenttitle"|socialni-site|wiki|facebook-promo|facebook-like-button"|meta-akce).*', re.DOTALL|re.IGNORECASE), lambda match: '</body>'),
(re.compile(r'<div class="[^"]*poutak-clanek-trojka".*', re.DOTALL|re.IGNORECASE), lambda match: '</body>')]
keep_only_tags = []
visited_urls = {}
def get_article_url(self, article):
url = BasicNewsRecipe.get_article_url(self, article)
if url in self.visited_urls:
self.log.debug('Ignoring duplicate: ' + url)
return None
else:
self.visited_urls[url] = True
self.log.debug('Accepting: ' + url)
return url
def encoding(self, source):
if source.newurl.find('blog.aktualne') >= 0:
enc = 'utf-8'
else:
enc = 'iso-8859-2'
self.log.debug('Called encoding ' + enc + " " + str(source.newurl))
return source.decode(enc, 'replace')

48
recipes/antyweb.recipe Normal file
View File

@ -0,0 +1,48 @@
from calibre.web.feeds.news import BasicNewsRecipe
class AntywebRecipe(BasicNewsRecipe):
encoding = 'utf-8'
__license__ = 'GPL v3'
__author__ = u'Artur Stachecki <artur.stachecki@gmail.com>'
language = 'pl'
version = 1
title = u'Antyweb'
category = u'News'
description = u'Blog o internecie i nowych technologiach'
cover_url=''
remove_empty_feeds= True
auto_cleanup = False
no_stylesheets=True
use_embedded_content = False
oldest_article = 1
max_articles_per_feed = 100
remove_javascript = True
simultaneous_downloads = 3
keep_only_tags =[]
keep_only_tags.append(dict(name = 'h1', attrs = { 'class' : 'mm-article-title'}))
keep_only_tags.append(dict(name = 'div', attrs = {'class' : 'mm-article-content'}))
remove_tags =[]
remove_tags.append(dict(name = 'h2', attrs = {'class' : 'widgettitle'}))
remove_tags.append(dict(name = 'img', attrs = {'class' : 'alignleft'}))
remove_tags.append(dict(name = 'div', attrs = {'class' : 'float: right;margin-left:1em;margin-bottom: 0.5em;padding-bottom: 3px; width: 72px;'}))
remove_tags.append(dict(name = 'img', attrs = {'src' : 'http://antyweb.pl/wp-content/uploads/2011/09/HOSTERSI_testy_pasek600x30.gif'}))
remove_tags.append(dict(name = 'div', attrs = {'class' : 'podwpisowe'}))
extra_css = '''
body {font-family: verdana, arial, helvetica, geneva, sans-serif ;}
'''
feeds = [
(u'Artykuly', u'feed://feeds.feedburner.com/Antyweb?format=xml'),
]
def preprocess_html(self, soup):
for alink in soup.findAll('a'):
if alink.string is not None:
tstr = alink.string
alink.replaceWith(tstr)
return soup

27
recipes/app_funds.recipe Normal file
View File

@ -0,0 +1,27 @@
#!/usr/bin/env python
__license__ = 'GPL v3'
__author__ = 'teepel <teepel44@gmail.com>'
'''
appfunds.blogspot.com
'''
from calibre.web.feeds.news import BasicNewsRecipe
class app_funds(BasicNewsRecipe):
title = u'APP Funds'
__author__ = 'teepel <teepel44@gmail.com>'
language = 'pl'
description ='Blog inwestora dla inwestorów i oszczędzających'
INDEX='http://appfunds.blogspot.com'
remove_empty_feeds= True
oldest_article = 7
max_articles_per_feed = 100
simultaneous_downloads = 5
remove_javascript=True
no_stylesheets=True
auto_cleanup = True
feeds = [(u'blog', u'http://feeds.feedburner.com/blogspot/etVI')]

30
recipes/autosport.recipe Normal file
View File

@ -0,0 +1,30 @@
#!/usr/bin/env python
__license__ = 'GPL v3'
__author__ = 'MrStefan <mrstefaan@gmail.com>'
'''
www.autosport.com
'''
from calibre.web.feeds.news import BasicNewsRecipe
class autosport(BasicNewsRecipe):
title = u'Autosport'
__author__ = 'MrStefan <mrstefaan@gmail.com>'
language = 'en_GB'
description =u'Daily Formula 1 and motorsport news from the leading weekly motor racing magazine. The authority on Formula 1, F1, MotoGP, GP2, Champ Car, Le Mans...'
masthead_url='http://cdn.images.autosport.com/asdotcom.gif'
remove_empty_feeds= True
oldest_article = 1
max_articles_per_feed = 100
remove_javascript=True
no_stylesheets=True
keep_only_tags =[]
keep_only_tags.append(dict(name = 'h1', attrs = {'class' : 'news_headline'}))
keep_only_tags.append(dict(name = 'td', attrs = {'class' : 'news_article_author'}))
keep_only_tags.append(dict(name = 'td', attrs = {'class' : 'news_article_date'}))
keep_only_tags.append(dict(name = 'p'))
feeds = [(u'ALL NEWS', u'http://www.autosport.com/rss/allnews.xml')]

50
recipes/bankier_pl.recipe Normal file
View File

@ -0,0 +1,50 @@
#!/usr/bin/env python
__license__ = 'GPL v3'
__author__ = 'teepel <teepel44@gmail.com>'
'''
bankier.pl
'''
from calibre.web.feeds.news import BasicNewsRecipe
class bankier(BasicNewsRecipe):
title = u'Bankier.pl'
__author__ = 'teepel <teepel44@gmail.com>'
language = 'pl'
description ='Polski portal finansowy. Informacje o: gospodarka, inwestowanie, finanse osobiste, prowadzenie firmy, kursy walut, notowania akcji, fundusze.'
masthead_url='http://www.bankier.pl/gfx/hd-mid-02.gif'
INDEX='http://bankier.pl/'
remove_empty_feeds= True
oldest_article = 1
max_articles_per_feed = 100
remove_javascript=True
no_stylesheets=True
simultaneous_downloads = 5
keep_only_tags =[]
keep_only_tags.append(dict(name = 'div', attrs = {'align' : 'left'}))
remove_tags =[]
remove_tags.append(dict(name = 'table', attrs = {'cellspacing' : '2'}))
remove_tags.append(dict(name = 'div', attrs = {'align' : 'center'}))
remove_tags.append(dict(name = 'img', attrs = {'src' : '/gfx/hd-mid-02.gif'}))
#remove_tags.append(dict(name = 'a', attrs = {'target' : '_blank'}))
#remove_tags.append(dict(name = 'br', attrs = {'clear' : 'all'}))
feeds = [
(u'Wiadomości dnia', u'http://feeds.feedburner.com/bankier-wiadomosci-dnia'),
(u'Finanse osobiste', u'http://feeds.feedburner.com/bankier-finanse-osobiste'),
(u'Firma', u'http://feeds.feedburner.com/bankier-firma'),
(u'Giełda', u'http://feeds.feedburner.com/bankier-gielda'),
(u'Rynek walutowy', u'http://feeds.feedburner.com/bankier-rynek-walutowy'),
(u'Komunikaty ze spółek', u'http://feeds.feedburner.com/bankier-espi'),
]
def print_version(self, url):
segment = url.split('.')
urlPart = segment[2]
segments = urlPart.split('-')
urlPart2 = segments[-1]
return 'http://www.bankier.pl/wiadomosci/print.html?article_id=' + urlPart2

55
recipes/blesk.recipe Normal file
View File

@ -0,0 +1,55 @@
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
from __future__ import unicode_literals
from calibre.web.feeds.recipes import BasicNewsRecipe
import re
class bleskRecipe(BasicNewsRecipe):
__author__ = 'bubak'
title = u'Blesk'
publisher = u''
description = 'blesk.cz'
oldest_article = 1
max_articles_per_feed = 20
use_embedded_content = False
feeds = [
(u'Zprávy', u'http://www.blesk.cz/rss/7'),
(u'Blesk', u'http://www.blesk.cz/rss/1'),
(u'Sex a tabu', u'http://www.blesk.cz/rss/2'),
(u'Celebrity', u'http://www.blesk.cz/rss/5'),
(u'Cestování', u'http://www.blesk.cz/rss/12')
]
#encoding = 'iso-8859-2'
language = 'cs'
cover_url = 'http://img.blesk.cz/images/blesk/blesk-logo.png'
remove_javascript = True
no_stylesheets = True
extra_css = """
"""
remove_attributes = []
remove_tags_before = dict(name='div', attrs={'id':['boxContent']})
remove_tags_after = dict(name='div', attrs={'class':['artAuthors']})
remove_tags = [dict(name='div', attrs={'class':['link_clanek']}),
dict(name='div', attrs={'id':['partHeader']}),
dict(name='div', attrs={'id':['top_bottom_box', 'lista_top']})]
preprocess_regexps = [(re.compile(r'<div class="(textovytip|related)".*', re.DOTALL|re.IGNORECASE), lambda match: '</body>')]
keep_only_tags = [dict(name='div', attrs={'class':'articleContent'})]
visited_urls = {}
def get_article_url(self, article):
url = BasicNewsRecipe.get_article_url(self, article)
if url in self.visited_urls:
self.log.debug('Ignoring duplicate: ' + url)
return None
else:
self.visited_urls[url] = True
self.log.debug('Accepting: ' + url)
return url

28
recipes/blognexto.recipe Normal file
View File

@ -0,0 +1,28 @@
from calibre.web.feeds.news import BasicNewsRecipe
class blognexto(BasicNewsRecipe):
title = 'BLOG.NEXTO.pl'
__author__ = 'MrStefan <mrstefaan@gmail.com>'
language = 'pl'
description ='o e-publikacjach prawie wszystko'
masthead_url='http://blog.nexto.pl/wp-content/uploads/2012/04/logo-blog-nexto.pl_.jpg'
remove_empty_feeds= True
oldest_article = 7
max_articles_per_feed = 100
remove_javascript=True
no_stylesheets=True
keep_only_tags =[]
keep_only_tags.append(dict(name = 'div', attrs = {'id' : 'content'}))
remove_tags =[]
remove_tags.append(dict(name = 'div', attrs = {'class' : 'comment-cloud'}))
remove_tags.append(dict(name = 'p', attrs = {'class' : 'post-date1'}))
remove_tags.append(dict(name = 'div', attrs = {'class' : 'fb-like'}))
remove_tags.append(dict(name = 'div', attrs = {'class' : 'tags'}))
remove_tags.append(dict(name = 'div', attrs = {'class' : 'postnavi'}))
remove_tags.append(dict(name = 'div', attrs = {'class' : 'commments-box'}))
remove_tags.append(dict(name = 'div', attrs = {'id' : 'respond'}))
feeds = [('Artykuly', 'http://feeds.feedburner.com/blognexto')]

140
recipes/brewiarz.recipe Normal file
View File

@ -0,0 +1,140 @@
#!/usr/bin/env python
__license__ = 'GPL v3'
from calibre.web.feeds.news import BasicNewsRecipe
import datetime, re
class brewiarz(BasicNewsRecipe):
title = u'Brewiarz'
__author__ = 'Artur Stachecki <artur.stachecki@gmail.com>'
language = 'pl'
description = u'Serwis poświęcony Liturgii Godzin (brewiarzowi) - formie codziennej modlitwy Kościoła katolickiego.'
masthead_url = 'http://brewiarz.pl/images/logo2.gif'
max_articles_per_feed = 100
remove_javascript = True
no_stylesheets = True
publication_type = 'newspaper'
next_days = 1
def parse_index(self):
dec2rom_dict = {"01": "i", "02": "ii", "03": "iii", "04": "iv",
"05": "v", "06": "vi", "07": "vii", "08": "viii",
"09": "ix", "10": "x", "11": "xi", "12": "xii"}
weekday_dict = {"Sunday": "Niedziela", "Monday": "Poniedziałek", "Tuesday": "Wtorek",
"Wednesday": "Środa", "Thursday": "Czwartek", "Friday": "Piątek", "Saturday": "Sobota"}
now = datetime.datetime.now()
feeds = []
for i in range(0, self.next_days):
url_date = now + datetime.timedelta(days=i)
url_date_month = url_date.strftime("%m")
url_date_month_roman = dec2rom_dict[url_date_month]
url_date_day = url_date.strftime("%d")
url_date_year = url_date.strftime("%Y")[2:]
url_date_weekday = url_date.strftime("%A")
url_date_weekday_pl = weekday_dict[url_date_weekday]
url = "http://brewiarz.pl/" + url_date_month_roman + "_" + url_date_year + "/" + url_date_day + url_date_month + "/index.php3"
articles = self.parse_pages(url)
if articles:
title = url_date_weekday_pl + " " + url_date_day + "." + url_date_month + "." + url_date_year
feeds.append((title, articles))
else:
sectors = self.get_sectors(url)
for subpage in sectors:
title = url_date_weekday_pl + " " + url_date_day + "." + url_date_month + "." + url_date_year + " - " + subpage.string
url = "http://brewiarz.pl/" + url_date_month_roman + "_" + url_date_year + "/" + url_date_day + url_date_month + "/" + subpage['href']
print(url)
articles = self.parse_pages(url)
if articles:
feeds.append((title, articles))
return feeds
def get_sectors(self, url):
sectors = []
soup = self.index_to_soup(url)
sectors_table = soup.find(name='table', attrs={'width': '490'})
sector_links = sectors_table.findAll(name='a')
for sector_links_modified in sector_links:
link_parent_text = sector_links_modified.findParent(name='div').text
if link_parent_text:
sector_links_modified.text = link_parent_text.text
sectors.append(sector_links_modified)
return sectors
def parse_pages(self, url):
current_articles = []
soup = self.index_to_soup(url)
www = soup.find(attrs={'class': 'www'})
if www:
box_title = www.find(text='Teksty LG')
article_box_parent = box_title.findParent('ul')
article_box_sibling = article_box_parent.findNextSibling('ul')
for li in article_box_sibling.findAll('li'):
link = li.find(name='a')
ol = link.findNextSibling(name='ol')
if ol:
sublinks = ol.findAll(name='a')
for sublink in sublinks:
link_title = self.tag_to_string(link) + " - " + self.tag_to_string(sublink)
link_url_print = re.sub('php3', 'php3?kr=_druk&wr=lg&', sublink['href'])
link_url = url[:-10] + link_url_print
current_articles.append({'title': link_title,
'url': link_url, 'description': '', 'date': ''})
else:
if link.findParent(name = 'ol'):
continue
else:
link_title = self.tag_to_string(link)
link_url_print = re.sub('php3', 'php3?kr=_druk&wr=lg&', link['href'])
link_url = url[:-10] + link_url_print
current_articles.append({'title': link_title,
'url': link_url, 'description': '', 'date': ''})
return current_articles
else:
return None
def preprocess_html(self, soup):
footer = soup.find(name='a', attrs={'href': 'http://brewiarz.pl'})
footer_parent = footer.findParent('div')
footer_parent.extract()
header = soup.find(text='http://brewiarz.pl')
header_parent = header.findParent('div')
header_parent.extract()
subheader = soup.find(text='Kolor szat:').findParent('div')
subheader.extract()
color = soup.find('b')
color.extract()
cleaned = self.strip_tags(soup)
div = cleaned.findAll(name='div')
div[1].extract()
div[2].extract()
div[3].extract()
return cleaned
def strip_tags(self, soup_dirty):
VALID_TAGS = ['p', 'div', 'br', 'b', 'a', 'title', 'head', 'html', 'body']
for tag in soup_dirty.findAll(True):
if tag.name not in VALID_TAGS:
for i, x in enumerate(tag.parent.contents):
if x == tag:
break
else:
print "Can't find", tag, "in", tag.parent
continue
for r in reversed(tag.contents):
tag.parent.insert(i, r)
tag.extract()
return soup_dirty

45
recipes/buchreport.recipe Normal file
View File

@ -0,0 +1,45 @@
from calibre.web.feeds.recipes import BasicNewsRecipe
'''Calibre recipe to convert the RSS feeds of the Buchreport to an ebook.'''
class Buchreport(BasicNewsRecipe) :
__author__ = 'a.peter'
__copyright__ = 'a.peter'
__license__ = 'GPL v3'
description = 'Buchreport'
version = 4
title = u'Buchreport'
timefmt = ' [%d.%m.%Y]'
encoding = 'cp1252'
language = 'de'
extra_css = 'body { margin-left: 0.00em; margin-right: 0.00em; } \
article, articledate, articledescription { text-align: left; } \
h1 { text-align: left; font-size: 140%; font-weight: bold; } \
h2 { text-align: left; font-size: 100%; font-weight: bold; font-style: italic; } \
h3 { text-align: left; font-size: 100%; font-weight: regular; font-style: italic; } \
h4, h5, h6 { text-align: left; font-size: 100%; font-weight: bold; }'
oldest_article = 7.0
no_stylesheets = True
remove_javascript = True
use_embedded_content = False
publication_type = 'newspaper'
remove_tags_before = dict(name='h2')
remove_tags_after = [
dict(name='div', attrs={'style':["padding-top:10px;clear:both"]})
]
remove_tags = [
dict(name='div', attrs={'style':["padding-top:10px;clear:both"]}),
dict(name='iframe'),
dict(name='img')
]
feeds = [
(u'Buchreport', u'http://www.buchreport.de/index.php?id=5&type=100')
]
def get_masthead_url(self):
return 'http://www.buchreport.de/fileadmin/template/img/buchreport_logo.jpg'

View File

@ -1,5 +1,5 @@
__license__ = 'GPL v3'
__copyright__ = '2009-2010, Darko Miletic <darko.miletic at gmail.com>'
__copyright__ = '2009-2012, Darko Miletic <darko.miletic at gmail.com>'
'''
www.business-standard.com
'''
@ -14,10 +14,12 @@ class BusinessStandard(BasicNewsRecipe):
max_articles_per_feed = 100
no_stylesheets = True
use_embedded_content = False
auto_cleanup = False
encoding = 'cp1252'
publisher = 'Business Standard Limited'
category = 'news, business, money, india, world'
language = 'en_IN'
masthead_url = 'http://feeds.business-standard.com/images/logo_08.jpg'
conversion_options = {
'comments' : description
@ -26,7 +28,7 @@ class BusinessStandard(BasicNewsRecipe):
,'publisher' : publisher
,'linearize_tables': True
}
keep_only_tags=[dict(attrs={'class':'TableClas'})]
#keep_only_tags=[dict(name='td', attrs={'class':'TableClas'})]
remove_tags = [
dict(name=['object','link','script','iframe','base','meta'])
,dict(attrs={'class':'rightDiv2'})
@ -45,3 +47,8 @@ class BusinessStandard(BasicNewsRecipe):
,(u'Management & Mktg' , u'http://feeds.business-standard.com/rss/7_0.xml' )
,(u'Opinion' , u'http://feeds.business-standard.com/rss/5_0.xml' )
]
def print_version(self, url):
l, s, tp = url.rpartition('/')
t, k, autono = l.rpartition('/')
return 'http://www.business-standard.com/india/printpage.php?autono=' + autono + '&tp=' + tp

View File

@ -0,0 +1,68 @@
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
from __future__ import unicode_literals
from calibre.web.feeds.recipes import BasicNewsRecipe
class ceskaPoziceRecipe(BasicNewsRecipe):
__author__ = 'bubak'
title = u'Česká pozice'
description = 'Česká pozice'
oldest_article = 2
max_articles_per_feed = 20
feeds = [
(u'Všechny články', u'http://www.ceskapozice.cz/rss.xml'),
(u'Domov', u'http://www.ceskapozice.cz/taxonomy/term/16/feed'),
(u'Chrono', u'http://www.ceskapozice.cz/chrono/feed'),
(u'Evropa', u'http://www.ceskapozice.cz/taxonomy/term/17/feed')
]
language = 'cs'
cover_url = 'http://www.ceskapozice.cz/sites/default/files/cpozice_logo.png'
remove_javascript = True
no_stylesheets = True
domain = u'http://www.ceskapozice.cz'
use_embedded_content = False
remove_tags = [dict(name='div', attrs={'class':['block-ad', 'region region-content-ad']}),
dict(name='ul', attrs={'class':'links'}),
dict(name='div', attrs={'id':['comments', 'back-to-top']}),
dict(name='div', attrs={'class':['next-page', 'region region-content-ad']}),
dict(name='cite')]
keep_only_tags = [dict(name='div', attrs={'id':'content'})]
visited_urls = {}
def get_article_url(self, article):
url = BasicNewsRecipe.get_article_url(self, article)
if url in self.visited_urls:
self.log.debug('Ignoring duplicate: ' + url)
return None
else:
self.visited_urls[url] = True
self.log.debug('Accepting: ' + url)
return url
def preprocess_html(self, soup):
self.append_page(soup, soup.body, 3)
return soup
def append_page(self, soup, appendtag, position):
pager = soup.find('div', attrs={'class':'paging-bottom'})
if pager:
nextbutton = pager.find('li', attrs={'class':'pager-next'})
if nextbutton:
nexturl = self.domain + nextbutton.a['href']
soup2 = self.index_to_soup(nexturl)
texttag = soup2.find('div', attrs={'class':'main-body'})
for it in texttag.findAll('div', attrs={'class':'region region-content-ad'}):
it.extract()
for it in texttag.findAll('cite'):
it.extract()
newpos = len(texttag.contents)
self.append_page(soup2, texttag, newpos)
texttag.extract()
appendtag.insert(position, texttag)
pager.extract()

View File

@ -0,0 +1,30 @@
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
from __future__ import unicode_literals
from calibre.web.feeds.recipes import BasicNewsRecipe
class ceskenovinyRecipe(BasicNewsRecipe):
__author__ = 'bubak'
title = u'České Noviny'
description = 'ceskenoviny.cz'
oldest_article = 1
max_articles_per_feed = 20
feeds = [
(u'Domácí', u'http://www.ceskenoviny.cz/sluzby/rss/domov.php')
#,(u'Hlavní události', u'http://www.ceskenoviny.cz/sluzby/rss/index.php')
#,(u'Přehled zpráv', u'http://www.ceskenoviny.cz/sluzby/rss/zpravy.php')
#,(u'Ze světa', u'http://www.ceskenoviny.cz/sluzby/rss/svet.php')
#,(u'Kultura', u'http://www.ceskenoviny.cz/sluzby/rss/kultura.php')
#,(u'IT', u'http://www.ceskenoviny.cz/sluzby/rss/pocitace.php')
]
language = 'cs'
cover_url = 'http://i4.cn.cz/grafika/cn_logo-print.gif'
remove_javascript = True
no_stylesheets = True
remove_attributes = []
filter_regexps = [r'img.aktualne.centrum.cz']
keep_only_tags = [dict(name='div', attrs={'id':'clnk'})]

View File

@ -0,0 +1,26 @@
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
from __future__ import unicode_literals
from calibre.web.feeds.recipes import BasicNewsRecipe
class cro6Recipe(BasicNewsRecipe):
__author__ = 'bubak'
title = u'Český rozhlas 6'
description = 'Český rozhlas 6'
oldest_article = 1
max_articles_per_feed = 20
feeds = [
(u'Český rozhlas 6', u'http://www.rozhlas.cz/export/cro6/')
]
language = 'cs'
cover_url = 'http://www.rozhlas.cz/img/e5/logo/cro6.png'
remove_javascript = True
no_stylesheets = True
remove_attributes = []
remove_tags = [dict(name='div', attrs={'class':['audio-play-all', 'poradHeaders', 'actions']}),
dict(name='p', attrs={'class':['para-last']})]
keep_only_tags = [dict(name='div', attrs={'id':'article'})]

39
recipes/demagog.cz.recipe Normal file
View File

@ -0,0 +1,39 @@
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
from __future__ import unicode_literals
from calibre.web.feeds.recipes import BasicNewsRecipe
import re
class demagogRecipe(BasicNewsRecipe):
__author__ = 'bubak'
title = u'Demagog.cz'
publisher = u''
description = 'demagog.cz'
oldest_article = 6
max_articles_per_feed = 20
use_embedded_content = False
remove_empty_feeds = True
feeds = [
(u'Aktuality', u'http://demagog.cz/rss')
]
#encoding = 'iso-8859-2'
language = 'cs'
cover_url = 'http://demagog.cz/content/images/demagog.cz.png'
remove_javascript = True
no_stylesheets = True
extra_css = """
.vyrok_suhrn{margin-top:50px; }
.vyrok{margin-bottom:30px; }
"""
remove_tags = [dict(name='a', attrs={'class':'vyrok_odovodnenie_tgl'}),
dict(name='img', attrs={'class':'vyrok_fotografia'})]
remove_tags_before = dict(name='h1')
remove_tags_after = dict(name='div', attrs={'class':'vyrok_text_after'})
preprocess_regexps = [(re.compile(r'(<div class="vyrok_suhrn">)', re.DOTALL|re.IGNORECASE), lambda match: '\1<hr>')]

36
recipes/denik.cz.recipe Normal file
View File

@ -0,0 +1,36 @@
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
from __future__ import unicode_literals
from calibre.web.feeds.recipes import BasicNewsRecipe
class ceskyDenikRecipe(BasicNewsRecipe):
__author__ = 'bubak'
title = u'denik.cz'
publisher = u''
description = u'Český deník'
oldest_article = 1
max_articles_per_feed = 20
use_embedded_content = False
remove_empty_feeds = True
feeds = [
(u'Z domova', u'http://www.denik.cz/rss/z_domova.html')
,(u'Pražský deník - Moje Praha', u'http://prazsky.denik.cz/rss/zpravy_region.html')
#,(u'Zahraničí', u'http://www.denik.cz/rss/ze_sveta.html')
#,(u'Kultura', u'http://www.denik.cz/rss/kultura.html')
]
#encoding = 'iso-8859-2'
language = 'cs'
cover_url = 'http://g.denik.cz/images/loga/denik.png'
remove_javascript = True
no_stylesheets = True
extra_css = """
"""
remove_tags = []
keep_only_tags = [dict(name='div', attrs={'class':'content'})]
#remove_tags_before = dict(name='h1')
remove_tags_after = dict(name='p', attrs={'class':'clanek-autor'})

View File

@ -0,0 +1,28 @@
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
from __future__ import unicode_literals
from calibre.web.feeds.recipes import BasicNewsRecipe
class denikReferendumRecipe(BasicNewsRecipe):
__author__ = 'bubak'
title = u'Den\u00edk Referendum'
publisher = u''
description = ''
oldest_article = 1
max_articles_per_feed = 20
feeds = [
(u'Deník Referendum', u'http://feeds.feedburner.com/DenikReferendum')
]
#encoding = 'iso-8859-2'
language = 'cs'
remove_javascript = True
no_stylesheets = True
use_embedded_content = False
remove_attributes = []
remove_tags_after = dict(name='div', attrs={'class':['text']})
remove_tags = [dict(name='div', attrs={'class':['box boxLine', 'box noprint', 'box']}),
dict(name='h3', attrs={'class':'head alt'})]
keep_only_tags = [dict(name='div', attrs={'id':['content']})]

View File

@ -6,7 +6,6 @@ class Dobreprogramy_pl(BasicNewsRecipe):
__author__ = 'fenuks'
__licence__ ='GPL v3'
category = 'IT'
language = 'pl'
masthead_url='http://static.dpcdn.pl/css/Black/Images/header_logo_napis_fullVersion.png'
cover_url = 'http://userlogos.org/files/logos/Karmody/dobreprogramy_01.png'
description = u'Aktualności i blogi z dobreprogramy.pl'

View File

@ -7,6 +7,7 @@ class AdvancedUserRecipe1332847053(BasicNewsRecipe):
title = u'Editoriali'
__author__ = 'faber1971'
description = 'Leading articles on Italy by the best Italian editorials'
language = 'it'
oldest_article = 1
max_articles_per_feed = 100

35
recipes/f1_ultra.recipe Normal file
View File

@ -0,0 +1,35 @@
from calibre.web.feeds.news import BasicNewsRecipe
import re
class f1ultra(BasicNewsRecipe):
title = u'Formuła 1 - F1 ultra'
__license__ = 'GPL v3'
__author__ = 'MrStefan <mrstefaan@gmail.com>, Artur Stachecki <artur.stachecki@gmail.com>'
language = 'pl'
description =u'Formuła 1, Robert Kubica, F3, GP2 oraz inne serie wyścigowe.'
masthead_url='http://www.f1ultra.pl/templates/f1ultra/images/logo.gif'
remove_empty_feeds= True
oldest_article = 1
max_articles_per_feed = 100
remove_javascript=True
no_stylesheets=True
keep_only_tags =[(dict(name = 'div', attrs = {'id' : 'main'}))]
remove_tags_after =[dict(attrs = {'style' : 'margin-top:5px;margin-bottom:5px;display: inline;'})]
remove_tags =[(dict(attrs = {'class' : ['buttonheading', 'avPlayerContainer', 'createdate']}))]
remove_tags.append(dict(attrs = {'title' : ['PDF', 'Drukuj', 'Email']}))
remove_tags.append(dict(name = 'form', attrs = {'method' : 'post'}))
remove_tags.append(dict(name = 'hr', attrs = {'size' : '2'}))
preprocess_regexps = [(re.compile(r'align="left"'), lambda match: ''),
(re.compile(r'align="right"'), lambda match: ''),
(re.compile(r'width=\"*\"'), lambda match: ''),
(re.compile(r'\<table .*?\>'), lambda match: '')]
extra_css = '''.contentheading { font-size: 1.4em; font-weight: bold; }
img { display: block; clear: both;}
'''
remove_attributes = ['width','height','position','float','padding-left','padding-right','padding','text-align']
feeds = [(u'F1 Ultra', u'http://www.f1ultra.pl/index.php?option=com_rd_rss&id=1&Itemid=245')]

View File

@ -8,6 +8,7 @@ from calibre.web.feeds.news import BasicNewsRecipe
class AdvancedUserRecipe1349086293(BasicNewsRecipe):
title = u'Foreign Policy'
language = 'en'
__author__ = 'Darko Miletic'
description = 'International News'
publisher = 'Washingtonpost.Newsweek Interactive, LLC'

View File

@ -1,39 +1,88 @@
#!/usr/bin/env python
__license__ = 'GPL v3'
__copyright__ = u'2010, Tomasz Dlugosz <tomek3d@gmail.com>'
__copyright__ = u'2010-2012, Tomasz Dlugosz <tomek3d@gmail.com>'
'''
fronda.pl
'''
from calibre.web.feeds.news import BasicNewsRecipe
import re
from datetime import timedelta, date
class Fronda(BasicNewsRecipe):
title = u'Fronda.pl'
publisher = u'Fronda.pl'
description = u'Portal po\u015bwi\u0119cony - Infformacje'
description = u'Portal po\u015bwi\u0119cony - Informacje'
language = 'pl'
__author__ = u'Tomasz D\u0142ugosz'
oldest_article = 7
max_articles_per_feed = 100
use_embedded_content = False
no_stylesheets = True
feeds = [(u'Infformacje', u'http://fronda.pl/news/feed')]
extra_css = '''
h1 {font-size:150%}
.body {text-align:left;}
div.headline {font-weight:bold}
'''
keep_only_tags = [dict(name='h2', attrs={'class':'news_title'}),
dict(name='div', attrs={'class':'naglowek_tresc'}),
dict(name='div', attrs={'id':'czytaj'}) ]
earliest_date = date.today() - timedelta(days=oldest_article)
remove_tags = [dict(name='a', attrs={'class':'print'})]
def date_cut(self,datestr):
# eg. 5.11.2012, 12:07
timestamp = datestr.split(',')[0]
parts = timestamp.split('.')
art_date = date(int(parts[2]),int(parts[1]),int(parts[0]))
return True if art_date < self.earliest_date else False
preprocess_regexps = [
(re.compile(i[0], re.IGNORECASE | re.DOTALL), i[1]) for i in
[ (r'<p><a href="http://fronda.pl/sklepy">.*</a></p>', lambda match: ''),
(r'<p><a href="http://fronda.pl/pasaz">.*</a></p>', lambda match: ''),
(r'<h3><strong>W.* lektury.*</a></p></div>', lambda match: '</div>'),
(r'<h3>Zobacz t.*?</div>', lambda match: '</div>'),
(r'<p[^>]*>&nbsp;</p>', lambda match: ''),
(r'<p><span style=".*?"><br /></span></p> ', lambda match: ''),
(r'<a style=\'float:right;margin-top:3px;\' href="http://www.facebook.com/share.php?.*?</a>', lambda match: '')]
def parse_index(self):
genres = [
('ekonomia,4.html', 'Ekonomia'),
('filozofia,15.html', 'Filozofia'),
('historia,6.html', 'Historia'),
('kosciol,8.html', 'Kościół'),
('kultura,5.html', 'Kultura'),
('media,10.html', 'Media'),
('nauka,9.html', 'Nauka'),
('polityka,11.html', 'Polityka'),
('polska,12.html', 'Polska'),
('prolife,3.html', 'Prolife'),
('religia,7.html', 'Religia'),
('rodzina,13.html', 'Rodzina'),
('swiat,14.html', 'Świat'),
('wydarzenie,16.html', 'Wydarzenie')
]
feeds = []
articles = {}
for url, genName in genres:
soup = self.index_to_soup('http://www.fronda.pl/c/'+ url)
articles[genName] = []
for item in soup.findAll('li'):
article_h = item.find('h2')
if not article_h:
continue
article_date = self.tag_to_string(item.find('b'))
if self.date_cut(article_date):
continue
article_a = article_h.find('a')
article_url = 'http://www.fronda.pl' + article_a['href']
article_title = self.tag_to_string(article_a)
articles[genName].append( { 'title' : article_title, 'url' : article_url, 'date' : article_date })
feeds.append((genName, articles[genName]))
return feeds
keep_only_tags = [
dict(name='div', attrs={'class':'yui-g'})
]
remove_tags = [
dict(name='div', attrs={'class':['related-articles',
'button right',
'pagination']}),
dict(name='h3', attrs={'class':'block-header article comments'}),
dict(name='ul', attrs={'class':'comment-list'}),
dict(name='ul', attrs={'class':'category'}),
dict(name='p', attrs={'id':'comments-disclaimer'}),
dict(name='div', attrs={'id':'comment-form'})
]

View File

@ -0,0 +1,102 @@
#!/usr/bin/env python
__license__ = 'GPL v3'
__copyright__ = 'teepel <teepel44@gmail.com> based on GW from fenuks'
'''
krakow.gazeta.pl
'''
from calibre.web.feeds.news import BasicNewsRecipe
class gw_krakow(BasicNewsRecipe):
title = u'Gazeta.pl Kraków'
__author__ = 'teepel <teepel44@gmail.com> based on GW from fenuks'
language = 'pl'
description =u'Wiadomości z Krakowa na portalu Gazeta.pl.'
category='newspaper'
publication_type = 'newspaper'
masthead_url='http://bi.gazeta.pl/im/5/8528/m8528105.gif'
INDEX='http://krakow.gazeta.pl/'
remove_empty_feeds= True
oldest_article = 1
max_articles_per_feed = 100
remove_javascript=True
no_stylesheets=True
keep_only_tags =[]
keep_only_tags.append(dict(name = 'div', attrs = {'id' : 'gazeta_article'}))
remove_tags =[]
remove_tags.append(dict(name = 'div', attrs = {'id' : 'gazeta_article_likes'}))
remove_tags.append(dict(name = 'div', attrs = {'id' : 'gazeta_article_tools'}))
remove_tags.append(dict(name = 'div', attrs = {'id' : 'rel'}))
remove_tags.append(dict(name = 'div', attrs = {'id' : 'gazeta_article_share'}))
remove_tags.append(dict(name = 'u1', attrs = {'id' : 'articleToolbar'}))
remove_tags.append(dict(name = 'li', attrs = {'class' : 'atComments'}))
remove_tags.append(dict(name = 'li', attrs = {'class' : 'atLicense'}))
remove_tags.append(dict(name = 'div', attrs = {'id' : 'banP4'}))
remove_tags.append(dict(name = 'div', attrs = {'id' : 'article_toolbar'}))
remove_tags.append(dict(name = 'div', attrs = {'id' : 'gazeta_article_tags'}))
remove_tags.append(dict(name = 'p', attrs = {'class' : 'txt_upl'}))
remove_tags.append(dict(name = 'div', attrs = {'class' : 'gazeta_article_related_new'}))
remove_tags.append(dict(name = 'div', attrs = {'class' : 'gazetaVideoPlayer'}))
remove_tags.append(dict(name = 'div', attrs = {'id' : 'gazeta_article_miniatures'}))
remove_tags.append(dict(name = 'div', attrs = {'id' : 'gazeta_article_buttons'}))
remove_tags_after = [dict(name = 'div', attrs = {'id' : 'gazeta_article_share'})]
feeds = [(u'Wiadomości', u'http://rss.gazeta.pl/pub/rss/krakow.xml')]
def skip_ad_pages(self, soup):
tag=soup.find(name='a', attrs={'class':'btn'})
if tag:
new_soup=self.index_to_soup(tag['href'], raw=True)
return new_soup
def append_page(self, soup, appendtag):
loop=False
tag = soup.find('div', attrs={'id':'Str'})
if appendtag.find('div', attrs={'id':'Str'}):
nexturl=tag.findAll('a')
appendtag.find('div', attrs={'id':'Str'}).extract()
loop=True
if appendtag.find(id='source'):
appendtag.find(id='source').extract()
while loop:
loop=False
for link in nexturl:
if u'następne' in link.string:
url= self.INDEX + link['href']
soup2 = self.index_to_soup(url)
pagetext = soup2.find(id='artykul')
pos = len(appendtag.contents)
appendtag.insert(pos, pagetext)
tag = soup2.find('div', attrs={'id':'Str'})
nexturl=tag.findAll('a')
loop=True
def gallery_article(self, appendtag):
tag=appendtag.find(id='container_gal')
if tag:
nexturl=appendtag.find(id='gal_btn_next').a['href']
appendtag.find(id='gal_navi').extract()
while nexturl:
soup2=self.index_to_soup(nexturl)
pagetext=soup2.find(id='container_gal')
nexturl=pagetext.find(id='gal_btn_next')
if nexturl:
nexturl=nexturl.a['href']
pos = len(appendtag.contents)
appendtag.insert(pos, pagetext)
rem=appendtag.find(id='gal_navi')
if rem:
rem.extract()
def preprocess_html(self, soup):
self.append_page(soup, soup.body)
if soup.find(id='container_gal'):
self.gallery_article(soup.body)
return soup

View File

@ -0,0 +1,99 @@
#!/usr/bin/env python
__license__ = 'GPL v3'
__author__ = 'teepel <teepel44@gmail.com> based on GW from fenuks'
'''
warszawa.gazeta.pl
'''
from calibre.web.feeds.news import BasicNewsRecipe
class gw_wawa(BasicNewsRecipe):
title = u'Gazeta.pl Warszawa'
__author__ = 'teepel <teepel44@gmail.com> based on GW from fenuks'
language = 'pl'
description ='Wiadomości z Warszawy na portalu Gazeta.pl.'
category='newspaper'
publication_type = 'newspaper'
masthead_url='http://bi.gazeta.pl/im/3/4089/m4089863.gif'
INDEX='http://warszawa.gazeta.pl/'
remove_empty_feeds= True
oldest_article = 1
max_articles_per_feed = 100
remove_javascript=True
no_stylesheets=True
keep_only_tags =[]
keep_only_tags.append(dict(name = 'div', attrs = {'id' : 'gazeta_article'}))
remove_tags =[]
remove_tags.append(dict(name = 'div', attrs = {'id' : 'gazeta_article_likes'}))
remove_tags.append(dict(name = 'div', attrs = {'id' : 'gazeta_article_tools'}))
remove_tags.append(dict(name = 'div', attrs = {'id' : 'rel'}))
remove_tags.append(dict(name = 'div', attrs = {'id' : 'gazeta_article_share'}))
remove_tags.append(dict(name = 'u1', attrs = {'id' : 'articleToolbar'}))
remove_tags.append(dict(name = 'li', attrs = {'class' : 'atComments'}))
remove_tags.append(dict(name = 'li', attrs = {'class' : 'atLicense'}))
remove_tags.append(dict(name = 'div', attrs = {'id' : 'banP4'}))
remove_tags.append(dict(name = 'div', attrs = {'id' : 'article_toolbar'}))
remove_tags.append(dict(name = 'div', attrs = {'id' : 'gazeta_article_tags'}))
remove_tags.append(dict(name = 'p', attrs = {'class' : 'txt_upl'}))
remove_tags.append(dict(name = 'div', attrs = {'class' : 'gazeta_article_related_new'}))
remove_tags.append(dict(name = 'div', attrs = {'class' : 'gazetaVideoPlayer'}))
remove_tags.append(dict(name = 'div', attrs = {'id' : 'gazeta_article_miniatures'}))
feeds = [(u'Wiadomości', u'http://rss.gazeta.pl/pub/rss/warszawa.xml')]
def skip_ad_pages(self, soup):
tag=soup.find(name='a', attrs={'class':'btn'})
if tag:
new_soup=self.index_to_soup(tag['href'], raw=True)
return new_soup
def append_page(self, soup, appendtag):
loop=False
tag = soup.find('div', attrs={'id':'Str'})
if appendtag.find('div', attrs={'id':'Str'}):
nexturl=tag.findAll('a')
appendtag.find('div', attrs={'id':'Str'}).extract()
loop=True
if appendtag.find(id='source'):
appendtag.find(id='source').extract()
while loop:
loop=False
for link in nexturl:
if u'następne' in link.string:
url= self.INDEX + link['href']
soup2 = self.index_to_soup(url)
pagetext = soup2.find(id='artykul')
pos = len(appendtag.contents)
appendtag.insert(pos, pagetext)
tag = soup2.find('div', attrs={'id':'Str'})
nexturl=tag.findAll('a')
loop=True
def gallery_article(self, appendtag):
tag=appendtag.find(id='container_gal')
if tag:
nexturl=appendtag.find(id='gal_btn_next').a['href']
appendtag.find(id='gal_navi').extract()
while nexturl:
soup2=self.index_to_soup(nexturl)
pagetext=soup2.find(id='container_gal')
nexturl=pagetext.find(id='gal_btn_next')
if nexturl:
nexturl=nexturl.a['href']
pos = len(appendtag.contents)
appendtag.insert(pos, pagetext)
rem=appendtag.find(id='gal_navi')
if rem:
rem.extract()
def preprocess_html(self, soup):
self.append_page(soup, soup.body)
if soup.find(id='container_gal'):
self.gallery_article(soup.body)
return soup

View File

@ -3,7 +3,7 @@ from calibre.web.feeds.news import BasicNewsRecipe
class Gazeta_Wyborcza(BasicNewsRecipe):
title = u'Gazeta Wyborcza'
title = u'Gazeta.pl'
__author__ = 'fenuks, Artur Stachecki'
language = 'pl'
description = 'news from gazeta.pl'

30
recipes/house_news.recipe Normal file
View File

@ -0,0 +1,30 @@
__license__ = 'GPL v3'
__copyright__ = '2012, Eddie Lau'
from calibre.web.feeds.news import BasicNewsRecipe
class AdvancedUserRecipeHouseNews(BasicNewsRecipe):
title = u'House News \u4e3b\u5834\u65b0\u805e'
__author__ = 'Eddie Lau'
publisher = 'House News'
oldest_article = 1
max_articles_per_feed = 100
auto_cleanup = False
language = 'zh'
encoding = 'utf-8'
description = 'http://thehousenews.com'
category = 'Chinese, Blogs, Opinion, News, Hong Kong'
masthead_url = 'http://thehousenews.com/static/images/housebeta.jpg'
extra_css = 'img {display: block; margin-left: auto; margin-right: auto; margin-top: 10px; margin-bottom: 10px; max-height:90%;} p[class=date] {font-size:50%;} div[class=author] {font-size:75%;} p[class=caption] {font-size:50%;}'
feeds = [(u'Latest', u'http://thehousenews.com/rss/')]
keep_only_tags = [dict(name='h1'),
dict(name='div', attrs={'class':['photo']}),
dict(name='p', attrs={'class':'caption'}),
dict(name='div', attrs={'class':'articleTextWrap'}),
dict(name='div', attrs={'class':['author']}),
dict(name='p', attrs={'class':'date'})]
def populate_article_metadata(self, article, soup, first):
if first and hasattr(self, 'add_toc_thumbnail'):
picdiv = soup.find('img')
if picdiv is not None:
self.add_toc_thumbnail(article,picdiv['src'])

Binary file not shown.

Before

Width:  |  Height:  |  Size: 1.6 KiB

After

Width:  |  Height:  |  Size: 878 B

BIN
recipes/icons/antyweb.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 668 B

BIN
recipes/icons/app_funds.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 471 B

BIN
recipes/icons/autosport.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 415 B

Binary file not shown.

After

Width:  |  Height:  |  Size: 190 B

BIN
recipes/icons/blognexto.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 699 B

BIN
recipes/icons/brewiarz.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 982 B

Binary file not shown.

Before

Width:  |  Height:  |  Size: 290 B

After

Width:  |  Height:  |  Size: 786 B

Binary file not shown.

After

Width:  |  Height:  |  Size: 536 B

BIN
recipes/icons/f1_ultra.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 490 B

Binary file not shown.

After

Width:  |  Height:  |  Size: 802 B

Binary file not shown.

After

Width:  |  Height:  |  Size: 802 B

Binary file not shown.

After

Width:  |  Height:  |  Size: 802 B

Binary file not shown.

Before

Width:  |  Height:  |  Size: 221 B

After

Width:  |  Height:  |  Size: 802 B

Binary file not shown.

After

Width:  |  Height:  |  Size: 588 B

BIN
recipes/icons/kp.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 485 B

Binary file not shown.

After

Width:  |  Height:  |  Size: 1.1 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 1.1 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 698 B

Binary file not shown.

After

Width:  |  Height:  |  Size: 609 B

Binary file not shown.

After

Width:  |  Height:  |  Size: 965 B

Binary file not shown.

After

Width:  |  Height:  |  Size: 820 B

Binary file not shown.

After

Width:  |  Height:  |  Size: 330 B

Binary file not shown.

After

Width:  |  Height:  |  Size: 1.1 KiB

BIN
recipes/icons/satkurier.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 1.2 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 1.2 KiB

BIN
recipes/icons/wprost.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 1.7 KiB

36
recipes/ihned.cz.recipe Normal file
View File

@ -0,0 +1,36 @@
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
from __future__ import unicode_literals
from calibre.web.feeds.recipes import BasicNewsRecipe
class ihnedRecipe(BasicNewsRecipe):
__author__ = 'bubak'
title = u'iHNed.cz'
publisher = u''
description = 'ihned.cz'
oldest_article = 1
max_articles_per_feed = 20
use_embedded_content = False
feeds = [
(u'Zprávy', u'http://zpravy.ihned.cz/?m=rss'),
(u'Hospodářské noviny', u'http://hn.ihned.cz/?p=500000_rss'),
(u'Byznys', u'http://byznys.ihned.cz/?m=rss'),
(u'Life', u'http://life.ihned.cz/?m=rss'),
(u'Dialog', u'http://dialog.ihned.cz/?m=rss')
]
#encoding = 'iso-8859-2'
language = 'cs'
cover_url = 'http://rss.ihned.cz/img/0/0_hp09/ihned.cz.gif'
remove_javascript = True
no_stylesheets = True
extra_css = """
"""
remove_attributes = []
remove_tags_before = dict(name='div', attrs={'id':['heading']})
remove_tags_after = dict(name='div', attrs={'id':['next-authors']})
remove_tags = [dict(name='ul', attrs={'id':['comm']}),
dict(name='div', attrs={'id':['r-big']}),
dict(name='div', attrs={'class':['tools tools-top']})]

59
recipes/insider.recipe Normal file
View File

@ -0,0 +1,59 @@
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
from __future__ import unicode_literals
import re
from calibre.web.feeds.news import BasicNewsRecipe
class insider(BasicNewsRecipe):
__author__ = 'bubak'
title = 'Insider'
language = 'cs'
remove_tags = [dict(name='div', attrs={'class':'article-related-content'})
,dict(name='div', attrs={'class':'calendar'})
,dict(name='span', attrs={'id':'labelHolder'})
]
no_stylesheets = True
keep_only_tags = [dict(name='div', attrs={'class':['doubleBlock textContentFormat']})]
preprocess_regexps = [(re.compile(r'T.mata:.*', re.DOTALL|re.IGNORECASE), lambda m: '</body>')]
needs_subscription = True
def get_browser(self):
br = BasicNewsRecipe.get_browser()
br.open('http://www.denikinsider.cz/')
br.select_form(nr=0)
br['login-name'] = self.username
br['login-password'] = self.password
res = br.submit()
raw = res.read()
if u'Odhlásit se' not in raw:
raise ValueError('Failed to login to insider.cz'
'Check your username and password.')
return br
def parse_index(self):
articles = []
soup = self.index_to_soup('http://www.denikinsider.cz')
titles = soup.findAll('span', attrs={'class':'homepageArticleTitle'})
if titles is None:
raise ValueError('Could not find category content')
articles = []
seen_titles = set([])
for title in titles:
if title.string in seen_titles:
continue
article = title.parent
seen_titles.add(title.string)
url = article['href']
if url.startswith('/'):
url = 'http://www.denikinsider.cz/'+url
self.log('\tFound article:', title, 'at', url)
articles.append({'title':title.string, 'url':url, 'description':'',
'date':''})
return [(self.title, articles)]

34
recipes/kerrang.recipe Normal file
View File

@ -0,0 +1,34 @@
#!/usr/bin/env python
__license__ = 'GPL v3'
from calibre.web.feeds.news import BasicNewsRecipe
class kerrang(BasicNewsRecipe):
title = u'Kerrang!'
__author__ = 'Artur Stachecki <artur.stachecki@gmail.com>'
language = 'en_GB'
description = u'UK-based magazine devoted to rock music published by Bauer Media Group'
oldest_article = 7
masthead_url = 'http://images.kerrang.com/design/kerrang/kerrangsite/logo.gif'
max_articles_per_feed = 100
simultaneous_downloads = 5
remove_javascript = True
no_stylesheets = True
use_embedded_content = False
recursions = 0
keep_only_tags = []
keep_only_tags.append(dict(attrs = {'class' : ['headz', 'blktxt']}))
extra_css = ''' img { display: block; margin-right: auto;}
h1 {text-align: left; font-size: 22px;}'''
feeds = [(u'News', u'http://www.kerrang.com/blog/rss.xml')]
def preprocess_html(self, soup):
for alink in soup.findAll('a'):
if alink.string is not None:
tstr = alink.string
alink.replaceWith(tstr)
return soup

52
recipes/kp.recipe Normal file
View File

@ -0,0 +1,52 @@
from calibre.web.feeds.news import BasicNewsRecipe
class KrytykaPolitycznaRecipe(BasicNewsRecipe):
__license__ = 'GPL v3'
__author__ = u'intromatyk <intromatyk@gmail.com>'
language = 'pl'
version = 1
title = u'Krytyka Polityczna'
category = u'News'
description = u' Lewicowe pismo zaangażowane w bieg spraw publicznych w Polsce.'
cover_url=''
remove_empty_feeds= True
no_stylesheets=True
oldest_article = 7
max_articles_per_feed = 100000
recursions = 0
no_stylesheets = True
remove_javascript = True
simultaneous_downloads = 3
keep_only_tags =[]
keep_only_tags.append(dict(name = 'h1', attrs = {'class' : 'print-title'}))
keep_only_tags.append(dict(name = 'div', attrs = {'class' : 'print-content'}))
remove_tags =[]
remove_tags.append(dict(attrs = {'class' : ['field field-type-text field-field-story-switch', 'field field-type-filefield field-field-story-temp' , 'field field-type-text field-field-story-author', 'field field-type-text field-field-story-lead-switch']}))
extra_css = '''
body {font-family: verdana, arial, helvetica, geneva, sans-serif ;}
td.contentheading{font-size: large; font-weight: bold;}
'''
feeds = [
('Wszystkie', 'http://www.krytykapolityczna.pl/rss.xml')
]
def print_version(self, url):
soup = self.index_to_soup(url)
print_ico = soup.find(attrs = {'class' : 'print-page'})
print_uri = print_ico['href']
self.log('PRINT', print_uri)
return 'http://www.krytykapolityczna.pl/' + print_uri
def preprocess_html(self, soup):
for alink in soup.findAll('a'):
if alink.string is not None:
tstr = alink.string
alink.replaceWith(tstr)
return soup

View File

@ -0,0 +1,32 @@
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
from __future__ import unicode_literals
from calibre.web.feeds.recipes import BasicNewsRecipe
class kudyznudyRecipe(BasicNewsRecipe):
__author__ = 'bubak'
title = u'Kudy z nudy'
publisher = u''
description = 'kudyznudy.cz'
oldest_article = 3
max_articles_per_feed = 20
use_embedded_content = False
feeds = [
(u'Praha nejnovější', u'http://www.kudyznudy.cz/RSS/Charts.aspx?Type=Newest&Lang=cs-CZ&RegionId=1')
]
#encoding = 'iso-8859-2'
language = 'cs'
cover_url = 'http://www.kudyznudy.cz/App_Themes/KzN/Images/Containers/Header/HeaderLogoKZN.png'
remove_javascript = True
no_stylesheets = True
extra_css = """
"""
remove_attributes = []
remove_tags_before = dict(name='div', attrs={'class':['C_WholeContentPadding']})
remove_tags_after = dict(name='div', attrs={'class':['SurroundingsContainer']})
remove_tags = [dict(name='div', attrs={'class':['Details', 'buttons', 'SurroundingsContainer', 'breadcrumb']})]
keep_only_tags = []

45
recipes/lequipe.recipe Normal file
View File

@ -0,0 +1,45 @@
#!/usr/bin/env python
__license__ = 'GPL v3'
from calibre.web.feeds.news import BasicNewsRecipe
class leequipe(BasicNewsRecipe):
title = u'l\'equipe'
__author__ = 'Artur Stachecki <artur.stachecki@gmail.com>'
language = 'fr'
description = u'Retrouvez tout le sport en direct sur le site de L\'EQUIPE et suivez l\'actualité du football, rugby, basket, cyclisme, f1, volley, hand, tous les résultats sportifs'
oldest_article = 1
masthead_url = 'http://static.lequipe.fr/v6/img/logo-lequipe.png'
max_articles_per_feed = 100
simultaneous_downloads = 5
remove_javascript = True
no_stylesheets = True
use_embedded_content = False
recursions = 0
keep_only_tags = []
keep_only_tags.append(dict(attrs={'id': ['article']}))
remove_tags = []
remove_tags.append(dict(attrs={'id': ['partage', 'ensavoirplus', 'bloc_bas_breve', 'commentaires', 'tools']}))
remove_tags.append(dict(attrs={'class': ['partage_bis', 'date']}))
feeds = [(u'Football', u'http://www.lequipe.fr/rss/actu_rss_Football.xml'),
(u'Auto-Moto', u'http://www.lequipe.fr/rss/actu_rss_Auto-Moto.xml'),
(u'Tennis', u'http://www.lequipe.fr/rss/actu_rss_Tennis.xml'),
(u'Golf', u'http://www.lequipe.fr/rss/actu_rss_Golf.xml'),
(u'Rugby', u'http://www.lequipe.fr/rss/actu_rss_Rugby.xml'),
(u'Basket', u'http://www.lequipe.fr/rss/actu_rss_Basket.xml'),
(u'Hand', u'http://www.lequipe.fr/rss/actu_rss_Hand.xml'),
(u'Cyclisme', u'http://www.lequipe.fr/rss/actu_rss_Cyclisme.xml'),
(u'Autres Sports', u'http://pipes.yahoo.com/pipes/pipe.run?_id=2039f7f4f350c70c5e4e8633aa1b37cd&_render=rss')
]
def preprocess_html(self, soup):
for alink in soup.findAll('a'):
if alink.string is not None:
tstr = alink.string
alink.replaceWith(tstr)
return soup

40
recipes/lidovky.recipe Normal file
View File

@ -0,0 +1,40 @@
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
from __future__ import unicode_literals
from calibre.web.feeds.recipes import BasicNewsRecipe
import re
class lnRecipe(BasicNewsRecipe):
__author__ = 'bubak'
title = u'lidovky'
publisher = u''
description = 'lidovky.cz'
oldest_article = 1
max_articles_per_feed = 20
feeds = [
(u'Události', u'http://www.lidovky.cz/export/rss.asp?r=ln_domov'),
(u'Svět', u'http://www.lidovky.cz/export/rss.asp?r=ln_zahranici'),
(u'Byznys', u'http://www.lidovky.cz/export/rss.asp?c=ln_byznys'),
(u'Věda', u'http://www.lidovky.cz/export/rss.asp?r=ln_veda'),
(u'Názory', u'http://www.lidovky.cz/export/rss.asp?r=ln_nazory'),
(u'Relax', u'http://www.lidovky.cz/export/rss.asp?c=ln_relax')
]
#encoding = 'iso-8859-2'
language = 'cs'
cover_url = 'http://g.lidovky.cz/o/lidovky_ln3b/lidovky-logo.png'
remove_javascript = True
no_stylesheets = True
use_embedded_content = False
remove_attributes = []
remove_tags_before = dict(name='div', attrs={'id':['content']})
remove_tags_after = dict(name='div', attrs={'class':['authors']})
preprocess_regexps = [(re.compile(r'<div id="(fb-root)".*', re.DOTALL|re.IGNORECASE), lambda match: '</body>')]
keep_only_tags = []

View File

@ -0,0 +1,36 @@
#!/usr/bin/env python
__license__ = 'GPL v3'
__author__ = 'teepel <teepel44@gmail.com>'
'''
http://www.mateusz.pl/czytania
'''
from calibre.web.feeds.news import BasicNewsRecipe
class czytania_mateusz(BasicNewsRecipe):
title = u'Czytania na ka\u017cdy dzie\u0144'
__author__ = 'teepel <teepel44@gmail.com>'
description = u'Codzienne czytania z jednego z najstarszych polskich serwisów katolickich.'
language = 'pl'
INDEX='http://www.mateusz.pl/czytania'
oldest_article = 1
remove_empty_feeds= True
no_stylesheets=True
auto_cleanup = True
remove_javascript = True
simultaneous_downloads = 2
max_articles_per_feed = 100
auto_cleanup = True
feeds = [(u'Czytania', u'http://mateusz.pl/rss/czytania/')]
remove_tags =[]
remove_tags.append(dict(name = 'p', attrs = {'class' : 'top'}))
#thanks t3d
def get_article_url(self, article):
link = article.get('link')
if 'kmt.pl' not in link:
return link

View File

@ -0,0 +1,29 @@
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
from __future__ import unicode_literals
from calibre.web.feeds.recipes import BasicNewsRecipe
class metropolRecipe(BasicNewsRecipe):
__author__ = 'bubak'
title = u'Metropol TV'
publisher = u''
description = 'metropol.cz'
oldest_article = 1
max_articles_per_feed = 20
use_embedded_content = False
feeds = [
(u'Metropolcv.cz', u'http://www.metropol.cz/rss/')
]
#encoding = 'iso-8859-2'
language = 'cs'
cover_url = 'http://www.metropol.cz/public/css/../images/logo/metropoltv.png'
remove_javascript = True
no_stylesheets = True
extra_css = """
"""
remove_attributes = []
keep_only_tags = [dict(name='div', attrs={'id':['art-full']})]

49
recipes/myapple_pl.recipe Normal file
View File

@ -0,0 +1,49 @@
from calibre.web.feeds.news import BasicNewsRecipe
class MyAppleRecipe(BasicNewsRecipe):
__license__ = 'GPL v3'
__author__ = u'Artur Stachecki <artur.stachecki@gmail.com>'
language = 'pl'
version = 1
title = u'MyApple.pl'
category = u'News'
description = u' Największy w Polsce serwis zajmujący się tematyką związaną z Apple i wszelkimi produktami tej firmy.'
cover_url=''
remove_empty_feeds= True
no_stylesheets=True
oldest_article = 7
max_articles_per_feed = 100000
recursions = 0
no_stylesheets = True
remove_javascript = True
simultaneous_downloads = 3
keep_only_tags =[]
keep_only_tags.append(dict(name = 'div', attrs = {'id' : 'article_content'}))
remove_tags =[]
remove_tags.append(dict(name = 'div', attrs = {'class' : 'article_author_date_comment_container'}))
remove_tags.append(dict(name = 'div', attrs = {'class' : 'fullwidth'}))
remove_tags.append(dict(name = 'div', attrs = {'class' : 'cmslinks'}))
remove_tags.append(dict(name = 'div', attrs = {'class' : 'googleads-468'}))
remove_tags.append(dict(name = 'div', attrs = {'id' : 'comments'}))
extra_css = '''
body {font-family: verdana, arial, helvetica, geneva, sans-serif ;}
td.contentheading{font-size: large; font-weight: bold;}
'''
feeds = [
('News', 'feed://myapple.pl/external.php?do=rss&type=newcontent&sectionid=1&days=120&count=10'),
]
def preprocess_html(self, soup):
for alink in soup.findAll('a'):
if alink.string is not None:
tstr = alink.string
alink.replaceWith(tstr)
return soup

View File

@ -0,0 +1,30 @@
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
from __future__ import unicode_literals
from calibre.web.feeds.recipes import BasicNewsRecipe
class nfpkRecipe(BasicNewsRecipe):
__author__ = 'bubak'
title = u'Nadační fond proti korupci'
publisher = u''
description = 'nfpk.cz'
oldest_article = 7
max_articles_per_feed = 20
use_embedded_content = False
remove_empty_feeds = True
feeds = [
(u'Aktuality', u'http://feeds.feedburner.com/nfpk')
]
#encoding = 'iso-8859-2'
language = 'cs'
cover_url = 'http://www.nfpk.cz/_templates/nfpk/_images/logo.gif'
remove_javascript = True
no_stylesheets = True
extra_css = """
"""
remove_attributes = []
keep_only_tags = [dict(name='div', attrs={'id':'content'})]

View File

@ -0,0 +1,61 @@
#!/usr/bin/env python
__license__ = 'GPL v3'
from calibre.web.feeds.news import BasicNewsRecipe
class naszdziennik(BasicNewsRecipe):
title = u'Nasz Dziennik'
__author__ = 'Artur Stachecki <artur.stachecki@gmail.com>'
language = 'pl'
description =u'Nasz Dziennik - Ogólnopolska gazeta codzienna. Podejmuje tematykę dotyczącą życia społecznego, kulturalnego, politycznego i religijnego. Propaguje wartości chrześcijańskie oraz tradycję i kulturę polską.'
masthead_url='http://www.naszdziennik.pl/images/logo-male.png'
max_articles_per_feed = 100
remove_javascript=True
no_stylesheets = True
keep_only_tags =[dict(attrs = {'id' : 'article'})]
#definiujemy nową funkcje; musi zwracać listę feedów wraz z artykułami
def parse_index(self):
#adres do parsowania artykułów
soup = self.index_to_soup('http://www.naszdziennik.pl/news')
#deklaracja pustej listy feedów
feeds = []
#deklaracja pustego słownika artykułów
articles = {}
#deklaracja pustej listy sekcji
sections = []
#deklaracja pierwszej sekcji jako pusty string
section = ''
#pętla for, która analizuje po kolei każdy tag "news-article"
for item in soup.findAll(attrs = {'class' : 'news-article'}) :
#w tagu "news-article szukamy pierwszego taga h4"
section = item.find('h4')
#zmiennej sekcja przypisujemy zawartość tekstową taga
section = self.tag_to_string(section)
#sprawdzamy czy w słowniku artykułów istnieje klucz dotyczący sekcji
#jeśli nie istnieje to :
if not articles.has_key(section) :
#do listy sekcji dodajemy nową sekcje
sections.append(section)
#deklarujemy nową sekcje w słowniku artykułów przypisując jej klucz odpowiadający nowej sekcji, którego wartością jest pusta lista
articles[section] = []
#przeszukujemy kolejny tag "title-datetime"
article_title_datetime = item.find(attrs = {'class' : 'title-datetime'})
#w tagu title-datetime znajdujemy pierwszy link
article_a = article_title_datetime.find('a')
#i tworzymy z niego link absolutny do właściwego artykułu
article_url = 'http://naszdziennik.pl' + article_a['href']
#jako tytuł użyty będzie tekst pomiędzy tagami <a>
article_title = self.tag_to_string(article_a)
#a data będzie tekstem z pierwszego taga h4 znalezionego w tagu title-datetime
article_date = self.tag_to_string(article_title_datetime.find('h4'))
#zebrane elementy dodajemy do listy zadeklarowanej w linijce 44
articles[section].append( { 'title' : article_title, 'url' : article_url, 'date' : article_date })
#po dodaniu wszystkich artykułów dodajemy sekcje do listy feedów, korzystając z list sekcji znajdujących się w słowniku
for section in sections:
feeds.append((section, articles[section]))
#zwracamy listę feedów, której parsowaniem zajmie się calibre
return feeds

View File

@ -0,0 +1,56 @@
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
from __future__ import unicode_literals
'''
Fetch Népszabadság
'''
from calibre.web.feeds.news import BasicNewsRecipe
class nepszabadsag(BasicNewsRecipe):
title = u'N\u00e9pszabads\u00e1g'
description = ''
__author__ = 'bubak'
use_embedded_content = False
timefmt = ' [%d %b %Y]'
oldest_article = 2
max_articles_per_feed = 20
no_stylesheets = True
language = 'hu'
#delay = 1
#timeout = 10
simultaneous_downloads = 5
#encoding = 'utf-8'
remove_javascript = True
cover_url = 'http://nol.hu/_design/image/logo_nol_live.jpg'
feeds = [
(u'Belföld', u'http://nol.hu/feed/belfold.rss')
#,(u'Külföld', u'http://nol.hu/feed/kulfold.rss')
#,(u'Gazdaság', u'http://nol.hu/feed/gazdasag.rss')
#,(u'Kultúra', u'http://nol.hu/feed/kult.rss')
]
extra_css = '''
'''
remove_attributes = []
remove_tags_before = dict(name='div', attrs={'class':['d-source']})
remove_tags_after = dict(name='div', attrs={'class':['tags']})
remove_tags = [dict(name='div', attrs={'class':['h']}),
dict(name='tfoot')]
keep_only_tags = [dict(name='table', attrs={'class':'article-box'})]
# NS sends an ad page sometimes but not frequently enough, TBD
def AAskip_ad_pages(self, soup):
if ('advertisement' in soup.find('title').string.lower()):
href = soup.find('a').get('href')
self.log.debug('Skipping to: ' + href)
new = self.browser.open(href).read().decode('utf-8', 'ignore')
#ipython(locals())
self.log.debug('Finished: ' + href)
return new
else:
return None

View File

@ -0,0 +1,32 @@
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
from __future__ import unicode_literals
from calibre.web.feeds.recipes import BasicNewsRecipe
class pesRecipe(BasicNewsRecipe):
__author__ = 'bubak'
title = u'Neviditelný pes'
publisher = u''
description = u'Neviditelný pes'
oldest_article = 1
max_articles_per_feed = 20
use_embedded_content = False
remove_empty_feeds = True
feeds = [
(u'Neviditelný pes', u'http://neviditelnypes.lidovky.cz/export/rss.asp?c=pes_neviditelny')
]
#encoding = 'iso-8859-2'
language = 'cs'
cover_url = 'http://g.zpravy.cz/o/pes/logo_pes.jpg'
remove_javascript = True
no_stylesheets = True
extra_css = """
"""
remove_tags = []
remove_tags_before = dict(name='div', attrs={'id':'art-full'})
remove_tags_after = dict(name='div', attrs={'id':'authors'})

View File

@ -63,11 +63,11 @@ class NewYorker(BasicNewsRecipe):
return url.strip()
def get_cover_url(self):
cover_url = None
soup = self.index_to_soup('http://www.newyorker.com/magazine/toc/')
cover_item = soup.find('img',attrs={'id':'inThisIssuePhoto'})
cover_url = "http://www.newyorker.com/images/covers/1925/1925_02_21_p233.jpg"
soup = self.index_to_soup('http://www.newyorker.com/magazine?intcid=magazine')
cover_item = soup.find('div',attrs={'id':'media-count-1'})
if cover_item:
cover_url = 'http://www.newyorker.com' + cover_item['src'].strip()
cover_url = 'http://www.newyorker.com' + cover_item.div.img['src'].strip()
return cover_url
def preprocess_html(self, soup):

View File

@ -13,7 +13,7 @@ import datetime
class Newsweek(BasicNewsRecipe):
# how many issues to go back, 0 means get the most current one
BACK_ISSUES = 1
BACK_ISSUES = 2
EDITION = '0'
DATE = None

50
recipes/novinky.cz.recipe Normal file
View File

@ -0,0 +1,50 @@
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
from __future__ import unicode_literals
from calibre.web.feeds.recipes import BasicNewsRecipe
class novinkyRecipe(BasicNewsRecipe):
__author__ = 'bubak'
title = u'novinky.cz'
publisher = u'seznam.cz'
description = 'novinky.cz'
oldest_article = 1
max_articles_per_feed = 20
feeds = [
(u'Domácí', u'http://www.novinky.cz/rss2/domaci/'),
(u'Praha', u'http://www.novinky.cz/rss2/vase-zpravy/praha/'),
(u'Ekonomika', u'http://www.novinky.cz/rss2/ekonomika/'),
(u'Finance', u'http://www.novinky.cz/rss2/finance/'),
]
#encoding = 'utf-8'
language = 'cs'
cover_url = 'http://www.novinky.cz/static/images/logo.gif'
remove_javascript = True
no_stylesheets = True
remove_tags = [dict(name='div', attrs={'id':['pictureInnerBox']}),
dict(name='div', attrs={'id':['discussionEntry']}),
dict(name='span', attrs={'id':['mynews-hits', 'mynews-author']}),
dict(name='div', attrs={'class':['related']}),
dict(name='div', attrs={'id':['multimediaInfo']})]
remove_tags_before = dict(name='div',attrs={'class':['articleHeader']})
remove_tags_after = dict(name='div',attrs={'class':'related'})
keep_only_tags = []
# This source has identical articles under different links
# which are redirected to the common url. I've found
# just this API method that has the real URL
visited_urls = {}
def encoding(self, source):
url = source.newurl
if url in self.visited_urls:
self.log.debug('Ignoring duplicate: ' + url)
return None
else:
self.visited_urls[url] = True
self.log.debug('Accepting: ' + url)
return source.decode('utf-8', 'replace')

View File

@ -0,0 +1,38 @@
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
from __future__ import unicode_literals
from calibre.web.feeds.recipes import BasicNewsRecipe
import re
class plRecipe(BasicNewsRecipe):
__author__ = 'bubak'
title = u'Parlamentn\u00ed Listy'
publisher = u''
description = ''
oldest_article = 1
max_articles_per_feed = 20
feeds = [
(u'Parlamentní listy.cz', u'http://www.parlamentnilisty.cz/export/rss.aspx')
]
#encoding = 'iso-8859-2'
language = 'cs'
cover_url = 'http://www.parlamentnilisty.cz/design/listy-logo2.png'
remove_javascript = True
no_stylesheets = True
use_embedded_content = False
remove_attributes = []
remove_tags = [dict(name='div', attrs={'class':['articledetailboxin','crumbs', 'relatedarticles articledetailbox']}),
dict(name='div', attrs={'class':['socialshare-1 noprint', 'socialshare-2 noprint']}),
dict(name='div', attrs={'id':'widget'}),
dict(name='div', attrs={'class':'article-discussion-box noprint'})]
preprocess_regexps = [(re.compile(r'<(span|strong)[^>]*>\s*Ptejte se politik.*', re.DOTALL|re.IGNORECASE), lambda match: '</body>')]
keep_only_tags = [dict(name='div', attrs={'class':['article-detail']})]

View File

@ -0,0 +1,40 @@
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
from __future__ import unicode_literals
from calibre.web.feeds.recipes import BasicNewsRecipe
class cpsRecipe(BasicNewsRecipe):
__author__ = 'bubak'
title = u'Piratská strana'
publisher = u''
description = ''
oldest_article = 3
max_articles_per_feed = 20
use_embedded_content = False
remove_empty_feeds = True
feeds = [
(u'Články', u'http://www.pirati.cz/rss.xml')
]
#encoding = 'iso-8859-2'
language = 'cs'
cover_url = 'http://www.pirati.cz/sites/all/themes/addari-cps/images/headbg.jpg'
remove_javascript = True
no_stylesheets = True
extra_css = """
"""
remove_attributes = []
keep_only_tags = [dict(name='div', attrs={'id':'postarea'})]
remove_tags = [dict(name='div', attrs={'class':['breadcrumb', 'submitted', 'links-readmore']}),
dict(name='div', attrs={'id':['comments']})]
remove_tags_before = dict(name='font', attrs={'size':'+3'})
remove_tags_after = [dict(name='iframe')]
conversion_options = {'linearize_tables' : True}

View File

@ -0,0 +1,34 @@
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
from __future__ import unicode_literals
from calibre.web.feeds.recipes import BasicNewsRecipe
class nfpkRecipe(BasicNewsRecipe):
__author__ = 'bubak'
title = u'Piratské noviny'
publisher = u''
description = 'nfpk.cz'
oldest_article = 2
max_articles_per_feed = 20
use_embedded_content = False
remove_empty_feeds = True
feeds = [
(u'Aktuality', u'http://www.piratskenoviny.cz/run/rss.php')
]
#encoding = 'iso-8859-2'
language = 'cs'
cover_url = 'http://www.piratskenoviny.cz/imgs/piratske-noviny.gif'
remove_javascript = True
no_stylesheets = True
extra_css = """
"""
remove_attributes = []
remove_tags_before = dict(name='font', attrs={'size':'+3'})
remove_tags_after = [dict(name='iframe')]
conversion_options = {'linearize_tables' : True}

View File

@ -4,7 +4,7 @@ class AdvancedUserRecipe1348063712(BasicNewsRecipe):
title = u'Portfolio.hu - English Edition'
__author__ = 'laca'
oldest_article = 7
language = 'en_HUN'
language = 'en_HU'
masthead_url = 'http://www.portfolio.hu/img/sit/angolfejlec2010.jpg'
use_embedded_content = False
auto_cleanup = True

64
recipes/pravo.recipe Normal file
View File

@ -0,0 +1,64 @@
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
from __future__ import unicode_literals
from calibre.web.feeds.news import BasicNewsRecipe
class pravo(BasicNewsRecipe):
__author__ = 'bubak'
title = 'Právo'
language = 'cs'
remove_tags_before = dict(name='div', attrs={'class':'rubrika-ostat'})
remove_tags_after = dict(name='td', attrs={'class':'rubrika'})
remove_tags = [dict(name='td', attrs={'width':'273'})
,dict(name='td', attrs={'class':'rubrika'})
,dict(name='div', attrs={'class':'rubrika-ostat'})
]
extra_css = '.nadpis {font-weight: bold; font-size: 130%;} .medium {text-align: justify;}'
cover_url = 'http://pravo.novinky.cz/images/horni_6_logo.gif'
cover_margins = (0, 100, '#ffffff')
conversion_options = {'linearize_tables' : True}
no_stylesheets = True
# our variables
seen_titles = set([])
# only yesterday's articles are online
parent_url = 'http://pravo.novinky.cz/minule/'
feeds = [
('Hlavní stránka', 'http://pravo.novinky.cz/minule/index.php'),
('Zpravodajství', 'http://pravo.novinky.cz/minule/zpravodajstvi.php'),
('Komentáře', 'http://pravo.novinky.cz/minule/komentare.php'),
('Praha a střední Čechy', 'http://pravo.novinky.cz/minule/praha_stredni_cechy.php')
]
def parse_index(self):
articles = []
for feed in self.feeds:
articles.append(self.parse_page(feed))
return articles
def parse_page(self, (feed_title, url)):
articles = []
soup = self.index_to_soup(url)
titles = soup.findAll('a', attrs={'class':'nadpis'})
if titles is None:
raise ValueError('Could not find any articles on page ' + url)
articles = []
for article in titles:
title = article.string
if title in self.seen_titles:
continue
self.seen_titles.add(title)
url = article['href']
if not url.startswith('http'):
url = self.parent_url + url
self.log('\tFound article:', title, 'at', url)
articles.append({'title':title.string, 'url':url, 'description':'',
'date':''})
return (feed_title, articles)

View File

@ -0,0 +1,40 @@
#!/usr/bin/env python
__license__ = 'GPL v3'
__author__ = 'teepel <teepel44@gmail.com>'
'''
http://prawica.net
'''
from calibre.web.feeds.news import BasicNewsRecipe
class prawica_recipe(BasicNewsRecipe):
title = u'prawica.net'
__author__ = 'teepel <teepel44@gmail.com>'
language = 'pl'
description ='Wiadomości ze strony prawica.net'
INDEX='http://prawica.net/'
remove_empty_feeds= True
oldest_article = 1
max_articles_per_feed = 100
remove_javascript=True
no_stylesheets=True
feeds = [(u'all', u'http://prawica.net/all/feed')]
keep_only_tags =[]
#this line should show title of the article, but it doesnt work
keep_only_tags.append(dict(name = 'h1', attrs = {'class' : 'print-title'}))
keep_only_tags.append(dict(name = 'div', attrs = {'class' : 'content'}))
remove_tags =[]
remove_tags.append(dict(name = 'div', attrs = {'class' : 'field field-type-viewfield field-field-autor2'}))
remove_tags.append(dict(name = 'div', attrs = {'class' : 'field field-type-viewfield field-field-publikacje-autora'}))
remove_tags.append(dict(name = 'div', attrs = {'id' : 'rate-widget-2 rate-widget clear-block rate-average rate-widget-fivestar rate-daa7512627f21dcf15e0af47e5279f0e rate-processed'}))
remove_tags_after =[(dict(name = 'div', attrs = {'class' : 'field-label-inline-first'}))]
def print_version(self, url):
return url.replace('http://prawica.net/', 'http://prawica.net/print/')

View File

@ -0,0 +1,32 @@
import re
from calibre.web.feeds.news import BasicNewsRecipe
class RedVoltaireRecipe(BasicNewsRecipe):
title = u'Red Voltaire'
__author__ = 'atordo'
description = u'Red de prensa no alineada, especializada en el an\u00e1lisis de las relaciones internacionales'
oldest_article = 7
max_articles_per_feed = 30
auto_cleanup = False
no_stylesheets = True
language = 'es'
use_embedded_content = False
remove_javascript = True
cover_url = u'http://www.voltairenet.org/squelettes/elements/images/logo-voltairenet-org.png'
masthead_url = u'http://www.voltairenet.org/squelettes/elements/images/logo-voltairenet-org.png'
preprocess_regexps = [
(re.compile(r'<title>(?P<titulo>.+)</title>.+<span class="updated" title=".+"><time', re.IGNORECASE|re.DOTALL)
,lambda match:'</title></head><body><h1>'+match.group('titulo')+'</h1><time')
,(re.compile(r'<time datetime=.+pubdate>. (?P<fecha>.+)</time>.+<!------------------- COLONNE TEXTE ------------------->', re.IGNORECASE|re.DOTALL)
,lambda match:'<small>'+match.group('fecha')+'</small>')
,(re.compile(r'<aside>.+', re.IGNORECASE|re.DOTALL)
,lambda match:'</body></html>')
]
extra_css = '''
img{margin-bottom:0.4em; display:block; margin-left:auto; margin-right:auto}
'''
feeds = [u'http://www.voltairenet.org/spip.php?page=backend&id_secteur=1110&lang=es']

37
recipes/respekt.recipe Normal file
View File

@ -0,0 +1,37 @@
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
from __future__ import unicode_literals
from calibre.web.feeds.recipes import BasicNewsRecipe
import re
class respektRecipe(BasicNewsRecipe):
__author__ = 'bubak'
title = u'Respekt'
publisher = u'Respekt'
description = 'Respekt'
oldest_article = 1
max_articles_per_feed = 20
feeds = [
(u'Všechny články', u'http://respekt.ihned.cz/index.php?p=R00000_rss')
,(u'Blogy', u'http://blog.respekt.ihned.cz/?p=Rb00VR_rss')
#,(u'Respekt DJ', u'http://respekt.ihned.cz/index.php?p=R00RDJ_rss')
]
encoding = 'cp1250'
language = 'cs'
cover_url = 'http://respekt.ihned.cz/img/R/respekt_logo.png'
remove_javascript = True
no_stylesheets = True
remove_tags = [dict(name='div', attrs={'class':['d-tools', 'actions']})]
remove_tags_before = dict(name='div',attrs={'id':['detail']})
remove_tags_after = dict(name='div',attrs={'class':'d-tools'})
preprocess_regexps = [(re.compile(r'<div class="paid-zone".*', re.DOTALL|re.IGNORECASE), lambda match: 'Za zbytek článku je nutno platit. </body>'),
(re.compile(r'.*<div class="mm-ow">', re.DOTALL|re.IGNORECASE), lambda match: '<body>'),
(re.compile(r'<div class="col3">.*', re.DOTALL|re.IGNORECASE), lambda match: '</body>')]
keep_only_tags = []

View File

@ -0,0 +1,28 @@
#!/usr/bin/env python
__license__ = 'GPL v3'
__author__ = 'MrStefan <mrstefaan@gmail.com>'
'''
www.rushisaband.com
'''
from calibre.web.feeds.news import BasicNewsRecipe
class rushisaband(BasicNewsRecipe):
title = u'Rushisaband'
__author__ = 'MrStefan <mrstefaan@gmail.com>'
language = 'en_GB'
description =u'A blog devoted to the band RUSH and its members, Neil Peart, Geddy Lee and Alex Lifeson'
remove_empty_feeds= True
oldest_article = 7
max_articles_per_feed = 100
remove_javascript=True
no_stylesheets=True
keep_only_tags =[]
keep_only_tags.append(dict(name = 'h4'))
keep_only_tags.append(dict(name = 'h5'))
keep_only_tags.append(dict(name = 'p'))
feeds = [(u'Rush is a Band', u'http://feeds2.feedburner.com/rushisaband/blog')]

29
recipes/rybinski.recipe Normal file
View File

@ -0,0 +1,29 @@
#!/usr/bin/env python
__license__ = 'GPL v3'
__copyright__ = u'2012, Tomasz Dlugosz <tomek3d@gmail.com>'
'''
rybinski.eu
'''
from calibre.web.feeds.news import BasicNewsRecipe
class Rybinski(BasicNewsRecipe):
title = u'Rybinski.eu - economy of the XXI century'
description = u'Blog ekonomiczny dra hab. Krzysztofa Rybi\u0144skiego'
language = 'pl'
__author__ = u'Tomasz D\u0142ugosz'
oldest_article = 7
max_articles_per_feed = 100
no_stylesheets = True
feeds = [(u'wpisy', u'http://www.rybinski.eu/?feed=rss2&lang=pl')]
keep_only_tags = [dict(name='div', attrs={'class':'post'})]
remove_tags = [
dict(name = 'div', attrs = {'class' : 'post-meta-1'}),
dict(name = 'div', attrs = {'class' : 'post-meta-2'}),
dict(name = 'div', attrs = {'class' : 'post-comments'})
]

View File

@ -0,0 +1,41 @@
#!/usr/bin/env python
__license__ = 'GPL v3'
__author__ = 'teepel <teepel44@gmail.com>'
'''
http://www.rynekinfrastruktury.pl
'''
from calibre.web.feeds.news import BasicNewsRecipe
class prawica_recipe(BasicNewsRecipe):
title = u'Rynek Infrastruktury'
__author__ = 'teepel <teepel44@gmail.com>'
language = 'pl'
description =u'Portal "Rynek Infrastruktury" to źródło informacji o kluczowych elementach polskiej gospodarki: drogach, kolei, lotniskach, portach, telekomunikacji, energetyce, prawie i polityce, wzmocnione eksperckimi komentarzami kluczowych analityków.'
remove_empty_feeds= True
oldest_article = 1
max_articles_per_feed = 100
remove_javascript=True
no_stylesheets=True
feeds = [
(u'Drogi', u'http://www.rynekinfrastruktury.pl/rss/41'),
(u'Lotniska', u'http://www.rynekinfrastruktury.pl/rss/42'),
(u'Kolej', u'http://www.rynekinfrastruktury.pl/rss/37'),
(u'Energetyka', u'http://www.rynekinfrastruktury.pl/rss/30'),
(u'Telekomunikacja', u'http://www.rynekinfrastruktury.pl/rss/31'),
(u'Porty', u'http://www.rynekinfrastruktury.pl/rss/32'),
(u'Prawo i polityka', u'http://www.rynekinfrastruktury.pl/rss/47'),
(u'Komentarze', u'http://www.rynekinfrastruktury.pl/rss/38'),
]
keep_only_tags =[]
keep_only_tags.append(dict(name = 'div', attrs = {'class' : 'articleContent'}))
remove_tags =[]
remove_tags.append(dict(name = 'span', attrs = {'class' : 'date'}))
def print_version(self, url):
return url.replace('http://www.rynekinfrastruktury.pl/artykul/', 'http://www.rynekinfrastruktury.pl/artykul/drukuj/')

View File

@ -0,0 +1,40 @@
#!/usr/bin/env python
__license__ = 'GPL v3'
__author__ = 'teepel <teepel44@gmail.com>'
'''
rynek-kolejowy.pl
'''
from calibre.web.feeds.news import BasicNewsRecipe
class rynek_kolejowy(BasicNewsRecipe):
title = u'Rynek Kolejowy'
__author__ = 'teepel <teepel44@gmail.com>'
language = 'pl'
description =u'Rynek Kolejowy - kalendarium wydarzeń branży kolejowej, konferencje, sympozja, targi kolejowe, krajowe i zagraniczne.'
masthead_url='http://p.wnp.pl/images/i/partners/rynek_kolejowy.gif'
remove_empty_feeds= True
oldest_article = 1
max_articles_per_feed = 100
remove_javascript=True
no_stylesheets=True
keep_only_tags =[]
keep_only_tags.append(dict(name = 'div', attrs = {'id' : 'mainContent'}))
remove_tags =[]
remove_tags.append(dict(name = 'div', attrs = {'class' : 'right no-print'}))
remove_tags.append(dict(name = 'div', attrs = {'id' : 'font-size'}))
remove_tags.append(dict(name = 'div', attrs = {'class' : 'no-print'}))
extra_css = '''.wiadomosc_title{ font-size: 1.4em; font-weight: bold; }'''
feeds = [(u'Wiadomości', u'http://www.rynek-kolejowy.pl/rss/rss.php')]
def print_version(self, url):
segment = url.split('/')
urlPart = segment[3]
return 'http://www.rynek-kolejowy.pl/drukuj.php?id=' + urlPart

View File

@ -0,0 +1,26 @@
#!/usr/bin/env python
__license__ = 'GPL v3'
__author__ = 'teepel <teepel44@gmail.com>'
'''
samcik.blox.pl
'''
from calibre.web.feeds.news import BasicNewsRecipe
class samcik(BasicNewsRecipe):
title = u'Maciej Samcik Blog'
__author__ = 'teepel <teepel44@gmail.com>'
language = 'pl'
description =u'Blog Macieja Samcika, długoletniego dziennikarza ekonomicznego Gazety Wyborczej . O finansach małych i dużych. Mnóstwo ciekawostek na temat pieniędzy.'
oldest_article = 7
max_articles_per_feed = 100
remove_javascript=True
no_stylesheets=True
simultaneous_downloads = 3
remove_tags =[]
remove_tags.append(dict(name = 'table', attrs = {'border' : '0'}))
feeds = [(u'Wpisy', u'http://samcik.blox.pl/rss2')]

47
recipes/satkurier.recipe Normal file
View File

@ -0,0 +1,47 @@
#!/usr/bin/env python
__license__ = 'GPL v3'
from calibre.web.feeds.news import BasicNewsRecipe
class SATKurier(BasicNewsRecipe):
title = u'SATKurier.pl'
__author__ = 'Artur Stachecki <artur.stachecki@gmail.com>'
language = 'pl'
description = u'Największy i najstarszy serwis poświęcony\
telewizji cyfrowej, przygotowywany przez wydawcę\
miesięcznika SAT Kurier. Bieżące wydarzenia\
z rynku mediów i nowych technologii.'
oldest_article = 7
masthead_url = 'http://satkurier.pl/img/header_sk_logo.gif'
max_articles_per_feed = 100
simultaneous_downloads = 5
remove_javascript = True
no_stylesheets = True
keep_only_tags = []
keep_only_tags.append(dict(name='div', attrs={'id': ['single_news', 'content']}))
remove_tags = []
remove_tags.append(dict(attrs={'id': ['news_info', 'comments']}))
remove_tags.append(dict(attrs={'href': '#czytaj'}))
remove_tags.append(dict(attrs={'align': 'center'}))
remove_tags.append(dict(attrs={'class': ['date', 'category', 'right mini-add-comment', 'socialLinks', 'commentlist']}))
remove_tags_after = [(dict(id='entry'))]
feeds = [(u'Najnowsze wiadomości', u'http://feeds.feedburner.com/satkurierpl?format=xml'),
(u'Sport w telewizji', u'http://feeds.feedburner.com/satkurier/sport?format=xml'),
(u'Blog', u'http://feeds.feedburner.com/satkurier/blog?format=xml')]
def preprocess_html(self, soup):
image = soup.find(attrs={'id': 'news_mini_photo'})
if image:
image.extract()
header = soup.find('h1')
header.replaceWith(header.prettify() + image.prettify())
for alink in soup.findAll('a'):
if alink.string is not None:
tstr = alink.string
alink.replaceWith(tstr)
return soup

View File

@ -17,6 +17,7 @@ class Sciencenews(BasicNewsRecipe):
max_articles_per_feed = 100
no_stylesheets = True
use_embedded_content = False
auto_cleanup = True
timefmt = ' [%A, %d %B, %Y]'
extra_css = '''
@ -31,14 +32,14 @@ class Sciencenews(BasicNewsRecipe):
.credit{color:#A6A6A6;font-family:helvetica,arial ;font-size: xx-small ;}
'''
keep_only_tags = [ dict(name='div', attrs={'id':'column_action'}) ]
remove_tags_after = dict(name='ul', attrs={'id':'content_functions_bottom'})
remove_tags = [
dict(name='ul', attrs={'id':'content_functions_bottom'})
,dict(name='div', attrs={'id':['content_functions_top','breadcrumb_content']})
,dict(name='img', attrs={'class':'icon'})
,dict(name='div', attrs={'class': 'embiggen'})
]
#keep_only_tags = [ dict(name='div', attrs={'id':'column_action'}) ]
#remove_tags_after = dict(name='ul', attrs={'id':'content_functions_bottom'})
#remove_tags = [
#dict(name='ul', attrs={'id':'content_functions_bottom'})
#,dict(name='div', attrs={'id':['content_functions_top','breadcrumb_content']})
#,dict(name='img', attrs={'class':'icon'})
#,dict(name='div', attrs={'class': 'embiggen'})
#]
feeds = [(u"Science News / News Items", u'http://sciencenews.org/index.php/feed/type/news/name/news.rss/view/feed/name/all.rss')]
@ -53,9 +54,9 @@ class Sciencenews(BasicNewsRecipe):
return cover_url
def preprocess_html(self, soup):
#def preprocess_html(self, soup):
for tag in soup.findAll(name=['span']):
tag.name = 'div'
#for tag in soup.findAll(name=['span']):
#tag.name = 'div'
return soup
#return soup

View File

@ -16,7 +16,7 @@ class AdvancedUserRecipe1303841067(BasicNewsRecipe):
no_stylesheets = True
remove_javascript = True
remove_empty_feeds = True
language = 'de_DE'
language = 'de'
#conversion_options = {'base_font_size': 20}

View File

@ -0,0 +1,67 @@
#!/usr/bin/env python
__license__ = 'GPL v3'
from calibre.web.feeds.news import BasicNewsRecipe
import re
class telepolis(BasicNewsRecipe):
title = u'Telepolis.pl'
__author__ = 'Artur Stachecki <artur.stachecki@gmail.com>'
language = 'pl'
description = u'Twój telekomunikacyjny serwis informacyjny.\
Codzienne informacje, testy i artykuły,\
promocje, baza telefonów oraz centrum rozrywki'
oldest_article = 7
masthead_url = 'http://telepolis.pl/i/telepolis-logo2.gif'
max_articles_per_feed = 100
simultaneous_downloads = 5
remove_javascript = True
no_stylesheets = True
use_embedded_content = False
remove_tags = []
remove_tags.append(dict(attrs={'alt': 'TELEPOLIS.pl'}))
preprocess_regexps = [(re.compile(r'<: .*? :>'),
lambda match: ''),
(re.compile(r'<b>Zobacz:</b>.*?</a>', re.DOTALL),
lambda match: ''),
(re.compile(r'<-ankieta.*?>'),
lambda match: ''),
(re.compile(r'\(Q\!\)'),
lambda match: ''),
(re.compile(r'\(plik.*?\)'),
lambda match: ''),
(re.compile(r'<br.*?><br.*?>', re.DOTALL),
lambda match: '')
]
extra_css = '''.tb { font-weight: bold; font-size: 20px;}'''
feeds = [
(u'Wiadomości', u'http://www.telepolis.pl/rss/news.php'),
(u'Artykuły', u'http://www.telepolis.pl/rss/artykuly.php')
]
def print_version(self, url):
if 'news.php' in url:
print_url = url.replace('news.php', 'news_print.php')
else:
print_url = url.replace('artykuly.php', 'art_print.php')
return print_url
def preprocess_html(self, soup):
for image in soup.findAll('img'):
if 'm.jpg' in image['src']:
image_big = image['src']
image_big = image_big.replace('m.jpg', '.jpg')
image['src'] = image_big
logo = soup.find('tr')
logo.extract()
for tag in soup.findAll('tr'):
for strings in ['Wiadomość wydrukowana', 'copyright']:
if strings in self.tag_to_string(tag):
tag.extract()
return self.adeify_images(soup)

View File

@ -30,11 +30,6 @@ class tvn24(BasicNewsRecipe):
feeds = [(u'Najnowsze', u'http://www.tvn24.pl/najnowsze.xml'), ]
#(u'Polska', u'www.tvn24.pl/polska.xml'), (u'\u015awiat', u'http://www.tvn24.pl/swiat.xml'), (u'Sport', u'http://www.tvn24.pl/sport.xml'), (u'Biznes', u'http://www.tvn24.pl/biznes.xml'), (u'Meteo', u'http://www.tvn24.pl/meteo.xml'), (u'Micha\u0142ki', u'http://www.tvn24.pl/michalki.xml'), (u'Kultura', u'http://www.tvn24.pl/kultura.xml')]
def preprocess_html(self, soup):
for item in soup.findAll(style=True):
del item['style']
return soup
def preprocess_html(self, soup):
for alink in soup.findAll('a'):
if alink.string is not None:

44
recipes/tyden.cz.recipe Normal file
View File

@ -0,0 +1,44 @@
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
from __future__ import unicode_literals
from calibre.web.feeds.recipes import BasicNewsRecipe
class tydenRecipe(BasicNewsRecipe):
__author__ = 'bubak'
title = u'Tyden.cz'
publisher = u''
description = ''
oldest_article = 1
max_articles_per_feed = 20
feeds = [
(u'Domácí', u'http://www.tyden.cz/rss/rss.php?rubrika_id=6'),
(u'Politika', u'http://www.tyden.cz/rss/rss.php?rubrika_id=173'),
(u'Kauzy', u'http://www.tyden.cz/rss/rss.php?rubrika_id=340')
]
#encoding = 'iso-8859-2'
language = 'cs'
cover_url = 'http://www.tyden.cz/img/tyden-logo.png'
remove_javascript = True
no_stylesheets = True
remove_attributes = []
remove_tags_before = dict(name='p', attrs={'id':['breadcrumbs']})
remove_tags_after = dict(name='p', attrs={'class':['author']})
visited_urls = {}
def get_article_url(self, article):
url = BasicNewsRecipe.get_article_url(self, article)
if url in self.visited_urls:
self.log.debug('Ignoring duplicate: ' + url)
return None
else:
self.visited_urls[url] = True
self.log.debug('Accepting: ' + url)
return url

View File

@ -4,7 +4,7 @@ class AdvancedUserRecipe1347997197(BasicNewsRecipe):
title = u'XpatLoop.com'
__author__ = 'laca'
oldest_article = 7
language = 'en_HUN'
language = 'en_HU'
auto_cleanup = True
masthead_url = 'http://www.xpatloop.com/images/cms/xs_logo.gif'
use_embedded_content = False

View File

@ -16,7 +16,7 @@ class ZeitDe(BasicNewsRecipe):
category = 'news, Germany'
timefmt = ' [%a, %d %b %Y]'
publication_type = 'newspaper'
language = 'de_DE'
language = 'de'
encoding = 'UTF-8'
oldest_article = 7

Binary file not shown.

Binary file not shown.

After

Width:  |  Height:  |  Size: 10 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 10 KiB

View File

@ -11,6 +11,7 @@ let g:syntastic_cpp_include_dirs = [
\'/usr/include/freetype2',
\'/usr/include/fontconfig',
\'src/qtcurve/common', 'src/qtcurve',
\'src/unrar',
\'/usr/include/ImageMagick',
\]
let g:syntastic_c_include_dirs = g:syntastic_cpp_include_dirs

View File

@ -6,7 +6,7 @@ __license__ = 'GPL v3'
__copyright__ = '2009, Kovid Goyal <kovid@kovidgoyal.net>'
__docformat__ = 'restructuredtext en'
import os, socket, struct, subprocess
import os, socket, struct, subprocess, sys, glob
from distutils.spawn import find_executable
from PyQt4 import pyqtconfig
@ -16,6 +16,7 @@ from setup import isosx, iswindows, islinux
OSX_SDK = '/Developer/SDKs/MacOSX10.5.sdk'
os.environ['MACOSX_DEPLOYMENT_TARGET'] = '10.5'
is64bit = sys.maxsize > 2**32
NMAKE = RC = msvc = MT = win_inc = win_lib = win_ddk = win_ddk_lib_dirs = None
if iswindows:
@ -35,7 +36,7 @@ if iswindows:
MT = os.path.join(os.path.dirname(p), 'bin', 'mt.exe')
MT = os.path.join(SDK, 'bin', 'mt.exe')
os.environ['QMAKESPEC'] = 'win32-msvc'
ICU = r'Q:\icu'
ICU = os.environ.get('ICU_DIR', r'Q:\icu')
QMAKE = '/Volumes/sw/qt/bin/qmake' if isosx else 'qmake'
if find_executable('qmake-qt4'):
@ -121,7 +122,8 @@ if iswindows:
zlib_lib_dirs = [sw_lib_dir]
zlib_libs = ['zlib']
magick_inc_dirs = [os.path.join(prefix, 'build', 'ImageMagick-6.7.6')]
md = glob.glob(os.path.join(prefix, 'build', 'ImageMagick-*'))[-1]
magick_inc_dirs = [md]
magick_lib_dirs = [os.path.join(magick_inc_dirs[0], 'VisualMagick', 'lib')]
magick_libs = ['CORE_RL_wand_', 'CORE_RL_magick_']
podofo_inc = os.path.join(sw_inc_dir, 'podofo')

View File

@ -18,7 +18,7 @@ from setup.build_environment import (chmlib_inc_dirs,
msvc, MT, win_inc, win_lib, win_ddk, magick_inc_dirs, magick_lib_dirs,
magick_libs, chmlib_lib_dirs, sqlite_inc_dirs, icu_inc_dirs,
icu_lib_dirs, win_ddk_lib_dirs, ft_libs, ft_lib_dirs, ft_inc_dirs,
zlib_libs, zlib_lib_dirs, zlib_inc_dirs)
zlib_libs, zlib_lib_dirs, zlib_inc_dirs, is64bit)
MT
isunix = islinux or isosx or isbsd
@ -47,6 +47,13 @@ class Extension(object):
self.ldflags = kwargs.get('ldflags', [])
self.optional = kwargs.get('optional', False)
self.needs_ddk = kwargs.get('needs_ddk', False)
of = kwargs.get('optimize_level', None)
if of is None:
of = '/Ox' if iswindows else '-O3'
else:
flag = '/O%d' if iswindows else '-O%d'
of = flag % of
self.cflags.insert(0, of)
def preflight(self, obj_dir, compiler, linker, builder, cflags, ldflags):
pass
@ -176,6 +183,24 @@ extensions = [
sip_files = ['calibre/gui2/progress_indicator/QProgressIndicator.sip']
),
Extension('unrar',
['unrar/%s.cpp'%(x.partition('.')[0]) for x in '''
rar.o strlist.o strfn.o pathfn.o savepos.o smallfn.o global.o file.o
filefn.o filcreat.o archive.o arcread.o unicode.o system.o
isnt.o crypt.o crc.o rawread.o encname.o resource.o match.o
timefn.o rdwrfn.o consio.o options.o ulinks.o errhnd.o rarvm.o
secpassword.o rijndael.o getbits.o sha1.o extinfo.o extract.o
volume.o list.o find.o unpack.o cmddata.o filestr.o scantree.o
'''.split()] + ['calibre/utils/unrar.cpp'],
inc_dirs=['unrar'],
cflags = [('/' if iswindows else '-') + x for x in (
'DSILENT', 'DRARDLL', 'DUNRAR')] + (
[] if iswindows else ['-D_FILE_OFFSET_BITS=64',
'-D_LARGEFILE_SOURCE']),
optimize_level=2,
libraries=['User32', 'Advapi32', 'kernel32', 'Shell32'] if iswindows else []
),
]
@ -239,7 +264,7 @@ if isunix:
cxx = os.environ.get('CXX', 'g++')
cflags = os.environ.get('OVERRIDE_CFLAGS',
# '-Wall -DNDEBUG -ggdb -fno-strict-aliasing -pipe')
'-O3 -Wall -DNDEBUG -fno-strict-aliasing -pipe')
'-Wall -DNDEBUG -fno-strict-aliasing -pipe')
cflags = shlex.split(cflags) + ['-fPIC']
ldflags = os.environ.get('OVERRIDE_LDFLAGS', '-Wall')
ldflags = shlex.split(ldflags)
@ -274,10 +299,12 @@ if isosx:
if iswindows:
cc = cxx = msvc.cc
cflags = '/c /nologo /Ox /MD /W3 /EHsc /DNDEBUG'.split()
cflags = '/c /nologo /MD /W3 /EHsc /DNDEBUG'.split()
ldflags = '/DLL /nologo /INCREMENTAL:NO /NODEFAULTLIB:libcmt.lib'.split()
#cflags = '/c /nologo /Ox /MD /W3 /EHsc /Zi'.split()
#ldflags = '/DLL /nologo /INCREMENTAL:NO /DEBUG'.split()
if is64bit:
cflags.append('/GS-')
for p in win_inc:
cflags.append('-I'+p)

Some files were not shown because too many files have changed in this diff Show More