Merge from trunk
42
.bzrignore
@ -39,3 +39,45 @@ recipes/.git
|
||||
recipes/.gitignore
|
||||
recipes/README
|
||||
recipes/katalog_egazeciarz.recipe
|
||||
recipes/tv_axnscifi.recipe
|
||||
recipes/tv_comedycentral.recipe
|
||||
recipes/tv_discoveryscience.recipe
|
||||
recipes/tv_foxlife.recipe
|
||||
recipes/tv_fox.recipe
|
||||
recipes/tv_hbo.recipe
|
||||
recipes/tv_kinopolska.recipe
|
||||
recipes/tv_nationalgeographic.recipe
|
||||
recipes/tv_polsat2.recipe
|
||||
recipes/tv_polsat.recipe
|
||||
recipes/tv_tv4.recipe
|
||||
recipes/tv_tvn7.recipe
|
||||
recipes/tv_tvn.recipe
|
||||
recipes/tv_tvp1.recipe
|
||||
recipes/tv_tvp2.recipe
|
||||
recipes/tv_tvphd.recipe
|
||||
recipes/tv_tvphistoria.recipe
|
||||
recipes/tv_tvpkultura.recipe
|
||||
recipes/tv_tvppolonia.recipe
|
||||
recipes/tv_tvpuls.recipe
|
||||
recipes/tv_viasathistory.recipe
|
||||
recipes/icons/tv_axnscifi.png
|
||||
recipes/icons/tv_comedycentral.png
|
||||
recipes/icons/tv_discoveryscience.png
|
||||
recipes/icons/tv_foxlife.png
|
||||
recipes/icons/tv_fox.png
|
||||
recipes/icons/tv_hbo.png
|
||||
recipes/icons/tv_kinopolska.png
|
||||
recipes/icons/tv_nationalgeographic.png
|
||||
recipes/icons/tv_polsat2.png
|
||||
recipes/icons/tv_polsat.png
|
||||
recipes/icons/tv_tv4.png
|
||||
recipes/icons/tv_tvn7.png
|
||||
recipes/icons/tv_tvn.png
|
||||
recipes/icons/tv_tvp1.png
|
||||
recipes/icons/tv_tvp2.png
|
||||
recipes/icons/tv_tvphd.png
|
||||
recipes/icons/tv_tvphistoria.png
|
||||
recipes/icons/tv_tvpkultura.png
|
||||
recipes/icons/tv_tvppolonia.png
|
||||
recipes/icons/tv_tvpuls.png
|
||||
recipes/icons/tv_viasathistory.png
|
||||
|
138
Changelog.yaml
@ -19,6 +19,144 @@
|
||||
# new recipes:
|
||||
# - title:
|
||||
|
||||
- version: 0.9.7
|
||||
date: 2012-11-23
|
||||
|
||||
new features:
|
||||
- title: "Edit metadata dialog: Show the size of the current book cover in the edit metadata dialog."
|
||||
tickets: [1079781]
|
||||
|
||||
- title: "Get Books: Allow easy searching by title and author in addition to any keyword, to prevent large numbers of spurious matches."
|
||||
|
||||
- title: "An option to automatically convert any added book to the current output format, found under Preferences->Adding books"
|
||||
|
||||
- title: "E-book viewer: Allow viewing tables in a separate popup window by right clicking on the table and selecting 'View table'. Useful for reference books that have lots of large tables."
|
||||
tickets: [1080710]
|
||||
|
||||
- title: "Catalogs: Add the current library name as an available field when generating catalogs in csv/xml format."
|
||||
tickets: [1078422]
|
||||
|
||||
- title: "Enable colored text in the output from the command line tools on windows"
|
||||
|
||||
- title: "E-book viewer: Add an option to hide the help message when entering full screen mode"
|
||||
|
||||
- title: "E-book viewer: Add an option to always start the viewer in full screen mode"
|
||||
|
||||
- title: "E-book viewer: Add many more controls to the context menu, particularly useful in full screen mode"
|
||||
|
||||
- title: "E-book viewer: Allow easy searching of the selected word or phrase in google via the context menu"
|
||||
|
||||
- title: "Add a new type of FileType plugin, postimport, that runs after a book has been added to the database."
|
||||
|
||||
- title: "Get Books: Remove Gandalf store, add Publio store. Update the Legimi store plugin for website changes"
|
||||
|
||||
bug fixes:
|
||||
- title: "Conversion: Correctly handle values of left and right for the deprecated align attribute of images, mapping them to the CSS float property instead of to text-align."
|
||||
tickets: [1081094]
|
||||
|
||||
- title: "MOBI Output: When generating joint MOBI6/KF8 files do not set incorrect display CSS values for tables in the KF8 part"
|
||||
|
||||
- title: "Connect to iTunes: Ignore AAC audio files."
|
||||
tickets: [1081096]
|
||||
|
||||
- title: "E-book viewer: Fix restoring from fullscreen not respecting maximized window state"
|
||||
|
||||
- title: "Fix rows in the device books view sometimes being too high"
|
||||
|
||||
- title: "Catalogs: Fixed a problem occurring when merging comments with a custom field whose type is a list."
|
||||
|
||||
- title: "Linux binary: Use exec in the wrapper shell scripts that are used to set env vars and launch calibre utilities."
|
||||
tickets: [1077884]
|
||||
|
||||
- title: "E-book viewer: Fix blank pages after every page when viewing some comic files in paged mode"
|
||||
|
||||
- title: "E-book viewer: When printing, respect the specified page range."
|
||||
tickets: [1074220]
|
||||
|
||||
- title: "Font subsetting: Parse the GSUB table for glyph substitution rules and do not remove any glyphs that could act as substitutes. Keep zero length glyphs like the glyphs for non printable characters when subsetting TrueType outlines."
|
||||
|
||||
- title: "Smarten punctuation: Fix self closing script tags causing smarten punctuation to fail"
|
||||
|
||||
|
||||
improved recipes:
|
||||
- Arguments and facts
|
||||
- Business Standard
|
||||
- The New Yorker
|
||||
|
||||
new recipes:
|
||||
- title: Various Czech and Hungarian news sources
|
||||
author: bubak
|
||||
|
||||
- title: Various Polish recipes
|
||||
author: Artur Stachecki
|
||||
|
||||
- title: Buchreport
|
||||
author: a.peter
|
||||
|
||||
- title: Red Voltaire
|
||||
author: atordo
|
||||
|
||||
- title: Autosport
|
||||
author: Mr Stefan
|
||||
|
||||
- title: House News
|
||||
author: Eddie Lau
|
||||
|
||||
- version: 0.9.6
|
||||
date: 2012-11-10
|
||||
|
||||
new features:
|
||||
- title: "Experimental support for subsetting fonts"
|
||||
description: "Subsetting a font means reducing the font to contain only the glyphs for the text actually present in the book. This can easily halve the size of the font. calibre can now do this for all embedded fonts during a conversion. Turn it on via the 'Subset all embedded fonts' option under the Look & Feel section of the conversion dialog. calibre can subset both TrueType and OpenType fonts. Note that this code is very new and likely has bugs, so please check the output if you turn on subsetting. The conversion log will have info about the subsetting operations."
|
||||
type: major
|
||||
|
||||
- title: "EPUB Input: Try to workaround EPUBs that have missing or damaged ZIP central directories. calibre should now be able to read/convert such an EPUB file, provided it does not suffer from further corruption."
|
||||
|
||||
- title: "Allow using identifiers in save to disk templates."
|
||||
tickets: [1074623]
|
||||
|
||||
- title: "calibredb: Add an option to not notify the GUI"
|
||||
|
||||
- title: "Catalogs: Fix long tags causing catalog generation to fail on windows. Add the ability to cross-reference authors, i.e. to relist the authors for a book with multiple authors separately."
|
||||
tickets: [1074931]
|
||||
|
||||
- title: "Edit metadata dialog: Add a clear tags button to remove all tags with a single click"
|
||||
|
||||
- title: "Add search to the font family chooser dialog"
|
||||
|
||||
bug fixes:
|
||||
- title: "Windows: Fix a long standing bug in the device eject code that for some reason only manifested in 0.9.5."
|
||||
tickets: [1075782]
|
||||
|
||||
- title: "Get Books: Fix Amazon stores, Google Books store and libri.de"
|
||||
|
||||
- title: "Kobo driver: More fixes for on device book matching, and list books as being on device even if the Kobo has not yet indexed them. Also some performance improvements."
|
||||
tickets: [1069617]
|
||||
|
||||
- title: "EPUB Output: Remove duplicate id and name attributes to eliminate pointless noise from the various epub check utilities"
|
||||
|
||||
- title: "Ask for confirmation before removing plugins"
|
||||
|
||||
- title: "Fix bulk convert queueing dialog becoming very long if any of the books have a very long title."
|
||||
tickets: [1076191]
|
||||
|
||||
- title: "Fix deleting custom column tags like data from the Tag browser not updating the last modified timestamp for affected books"
|
||||
tickets: [1075476]
|
||||
|
||||
- title: "When updating a previously broken plugin, do not show an error message because the previous version of the plugin cannot be loaded"
|
||||
|
||||
- title: "Fix regression that broke the Template Editor"
|
||||
|
||||
improved recipes:
|
||||
- Various updated Polish recipes
|
||||
- London Review of Books
|
||||
- Yemen Times
|
||||
|
||||
new recipes:
|
||||
- title: "Various Polish news sources"
|
||||
author: Artur Stachecki
|
||||
|
||||
|
||||
- version: 0.9.5
|
||||
date: 2012-11-02
|
||||
|
||||
|
@ -649,20 +649,24 @@ If it still wont launch, start a command prompt (press the windows key and R; th
|
||||
|
||||
Post any output you see in a help message on the `Forum <http://www.mobileread.com/forums/forumdisplay.php?f=166>`_.
|
||||
|
||||
|app| freezes when I click on anything?
|
||||
|app| freezes/crashes occasionally?
|
||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
|
||||
There are three possible things I know of, that can cause this:
|
||||
|
||||
* You recently connected an external monitor or TV to your computer. In this case, whenever |app| opens a new window like the edit metadata window or the conversion dialog, it appears on the second monitor where you dont notice it and so you think |app| has frozen. Disconnect your second monitor and restart calibre.
|
||||
* You recently connected an external monitor or TV to your computer. In
|
||||
this case, whenever |app| opens a new window like the edit metadata
|
||||
window or the conversion dialog, it appears on the second monitor where
|
||||
you dont notice it and so you think |app| has frozen. Disconnect your
|
||||
second monitor and restart calibre.
|
||||
|
||||
* You are using a Wacom branded mouse. There is an incompatibility between Wacom mice and the graphics toolkit |app| uses. Try using a non-Wacom mouse.
|
||||
* You are using a Wacom branded mouse. There is an incompatibility between
|
||||
Wacom mice and the graphics toolkit |app| uses. Try using a non-Wacom
|
||||
mouse.
|
||||
|
||||
* If you use RoboForm, it is known to cause |app| to crash. Add |app| to
|
||||
the blacklist of programs inside RoboForm to fix this.
|
||||
|
||||
* Sometimes if some software has installed lots of new files in your fonts folder, |app| can crash until it finishes indexing them. Just start |app|, then leave it alone for about 20 minutes, without clicking on anything. After that you should be able to use |app| as normal.
|
||||
|
||||
the blacklist of programs inside RoboForm to fix this. Or uninstall
|
||||
RoboForm.
|
||||
|
||||
|app| is not starting on OS X?
|
||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
@ -721,8 +725,8 @@ You can switch |app| to using a backed up library folder by simply clicking the
|
||||
|
||||
If you want to backup the |app| configuration/plugins, you have to backup the config directory. You can find this config directory via :guilabel:`Preferences->Miscellaneous`. Note that restoring configuration directories is not officially supported, but should work in most cases. Just copy the contents of the backup directory into the current configuration directory to restore.
|
||||
|
||||
How do I use purchased EPUB books with |app|?
|
||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
How do I use purchased EPUB books with |app| (or what do I do with .acsm files)?
|
||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
Most purchased EPUB books have `DRM <http://drmfree.calibre-ebook.com/about#drm>`_. This prevents |app| from opening them. You can still use |app| to store and transfer them to your ebook reader. First, you must authorize your reader on a windows machine with Adobe Digital Editions. Once this is done, EPUB books transferred with |app| will work fine on your reader. When you purchase an epub book from a website, you will get an ".acsm" file. This file should be opened with Adobe Digital Editions, which will then download the actual ".epub" ebook. The ebook file will be stored in the folder "My Digital Editions", from where you can add it to |app|.
|
||||
|
||||
I am getting a "Permission Denied" error?
|
||||
|
@ -1,5 +1,5 @@
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2010, Darko Miletic <darko.miletic at gmail.com>'
|
||||
__copyright__ = '2010 - 2012, Darko Miletic <darko.miletic at gmail.com>'
|
||||
'''
|
||||
www.aif.ru
|
||||
'''
|
||||
@ -19,12 +19,19 @@ class AIF_ru(BasicNewsRecipe):
|
||||
encoding = 'cp1251'
|
||||
language = 'ru'
|
||||
publication_type = 'magazine'
|
||||
extra_css = ' @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)} body{font-family: Verdana,Arial,Helvetica,sans1,sans-serif} '
|
||||
keep_only_tags = [dict(name='div',attrs={'id':'inner'})]
|
||||
masthead_url = 'http://static3.aif.ru/glossy/index/i/logo.png'
|
||||
extra_css = """
|
||||
@font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)}
|
||||
body{font-family: Verdana,Arial,Helvetica,sans1,sans-serif}
|
||||
img{display: block}
|
||||
"""
|
||||
keep_only_tags = [
|
||||
dict(name='div',attrs={'class':['content-header', 'zoom']})
|
||||
,dict(name='div',attrs={'id':'article-text'})
|
||||
]
|
||||
remove_tags = [
|
||||
dict(name=['iframe','object','link','base','input','img'])
|
||||
,dict(name='div',attrs={'class':'photo'})
|
||||
,dict(name='p',attrs={'class':'resizefont'})
|
||||
dict(name=['iframe','object','link','base','input','meta'])
|
||||
,dict(name='div',attrs={'class':'in-topic'})
|
||||
]
|
||||
|
||||
feeds = [(u'News', u'http://www.aif.ru/rss/all.php')]
|
||||
|
69
recipes/aktualne.cz.recipe
Normal file
@ -0,0 +1,69 @@
|
||||
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
|
||||
from __future__ import unicode_literals
|
||||
from calibre.web.feeds.recipes import BasicNewsRecipe
|
||||
import re
|
||||
|
||||
class aktualneRecipe(BasicNewsRecipe):
|
||||
__author__ = 'bubak'
|
||||
title = u'aktualne.cz'
|
||||
publisher = u'Centrum holdings'
|
||||
description = 'aktuálně.cz'
|
||||
oldest_article = 1
|
||||
max_articles_per_feed = 20
|
||||
|
||||
feeds = [
|
||||
(u'Domácí', u'http://aktualne.centrum.cz/feeds/rss/domaci/?photo=0'),
|
||||
(u'Zprávy', u'http://aktualne.centrum.cz/feeds/rss/zpravy/?photo=0'),
|
||||
(u'Praha', u'http://aktualne.centrum.cz/feeds/rss/domaci/regiony/praha/?photo=0'),
|
||||
(u'Ekonomika', u'http://aktualne.centrum.cz/feeds/rss/ekonomika/?photo=0'),
|
||||
(u'Finance', u'http://aktualne.centrum.cz/feeds/rss/finance/?photo=0'),
|
||||
(u'Blogy a názory', u'http://blog.aktualne.centrum.cz/export-all.php')
|
||||
]
|
||||
|
||||
|
||||
language = 'cs'
|
||||
cover_url = 'http://img.aktualne.centrum.cz/design/akt4/o/l/logo-akt-ciste.png'
|
||||
remove_javascript = True
|
||||
no_stylesheets = True
|
||||
|
||||
remove_attributes = []
|
||||
remove_tags_before = dict(name='h1', attrs={'class':['titulek-clanku']})
|
||||
filter_regexps = [r'img.aktualne.centrum.cz']
|
||||
remove_tags = [dict(name='div', attrs={'id':['social-bookmark']}),
|
||||
dict(name='div', attrs={'class':['box1', 'svazane-tagy']}),
|
||||
dict(name='div', attrs={'class':'itemcomment id0'}),
|
||||
dict(name='div', attrs={'class':'hlavicka'}),
|
||||
dict(name='div', attrs={'class':'hlavni-menu'}),
|
||||
dict(name='div', attrs={'class':'top-standard-brand-obal'}),
|
||||
dict(name='div', attrs={'class':'breadcrumb'}),
|
||||
dict(name='div', attrs={'id':'start-standard'}),
|
||||
dict(name='div', attrs={'id':'forum'}),
|
||||
dict(name='span', attrs={'class':'akce'}),
|
||||
dict(name='span', attrs={'class':'odrazka vetsi'}),
|
||||
dict(name='div', attrs={'class':'boxP'}),
|
||||
dict(name='div', attrs={'class':'box2'})]
|
||||
preprocess_regexps = [
|
||||
(re.compile(r'<div class="(contenttitle"|socialni-site|wiki|facebook-promo|facebook-like-button"|meta-akce).*', re.DOTALL|re.IGNORECASE), lambda match: '</body>'),
|
||||
(re.compile(r'<div class="[^"]*poutak-clanek-trojka".*', re.DOTALL|re.IGNORECASE), lambda match: '</body>')]
|
||||
|
||||
keep_only_tags = []
|
||||
|
||||
visited_urls = {}
|
||||
def get_article_url(self, article):
|
||||
url = BasicNewsRecipe.get_article_url(self, article)
|
||||
if url in self.visited_urls:
|
||||
self.log.debug('Ignoring duplicate: ' + url)
|
||||
return None
|
||||
else:
|
||||
self.visited_urls[url] = True
|
||||
self.log.debug('Accepting: ' + url)
|
||||
return url
|
||||
|
||||
def encoding(self, source):
|
||||
if source.newurl.find('blog.aktualne') >= 0:
|
||||
enc = 'utf-8'
|
||||
else:
|
||||
enc = 'iso-8859-2'
|
||||
self.log.debug('Called encoding ' + enc + " " + str(source.newurl))
|
||||
return source.decode(enc, 'replace')
|
||||
|
48
recipes/antyweb.recipe
Normal file
@ -0,0 +1,48 @@
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class AntywebRecipe(BasicNewsRecipe):
|
||||
encoding = 'utf-8'
|
||||
__license__ = 'GPL v3'
|
||||
__author__ = u'Artur Stachecki <artur.stachecki@gmail.com>'
|
||||
language = 'pl'
|
||||
version = 1
|
||||
title = u'Antyweb'
|
||||
category = u'News'
|
||||
description = u'Blog o internecie i nowych technologiach'
|
||||
cover_url=''
|
||||
remove_empty_feeds= True
|
||||
auto_cleanup = False
|
||||
no_stylesheets=True
|
||||
use_embedded_content = False
|
||||
oldest_article = 1
|
||||
max_articles_per_feed = 100
|
||||
remove_javascript = True
|
||||
simultaneous_downloads = 3
|
||||
|
||||
keep_only_tags =[]
|
||||
keep_only_tags.append(dict(name = 'h1', attrs = { 'class' : 'mm-article-title'}))
|
||||
keep_only_tags.append(dict(name = 'div', attrs = {'class' : 'mm-article-content'}))
|
||||
|
||||
|
||||
remove_tags =[]
|
||||
remove_tags.append(dict(name = 'h2', attrs = {'class' : 'widgettitle'}))
|
||||
remove_tags.append(dict(name = 'img', attrs = {'class' : 'alignleft'}))
|
||||
remove_tags.append(dict(name = 'div', attrs = {'class' : 'float: right;margin-left:1em;margin-bottom: 0.5em;padding-bottom: 3px; width: 72px;'}))
|
||||
remove_tags.append(dict(name = 'img', attrs = {'src' : 'http://antyweb.pl/wp-content/uploads/2011/09/HOSTERSI_testy_pasek600x30.gif'}))
|
||||
remove_tags.append(dict(name = 'div', attrs = {'class' : 'podwpisowe'}))
|
||||
|
||||
|
||||
extra_css = '''
|
||||
body {font-family: verdana, arial, helvetica, geneva, sans-serif ;}
|
||||
'''
|
||||
|
||||
feeds = [
|
||||
(u'Artykuly', u'feed://feeds.feedburner.com/Antyweb?format=xml'),
|
||||
]
|
||||
def preprocess_html(self, soup):
|
||||
for alink in soup.findAll('a'):
|
||||
if alink.string is not None:
|
||||
tstr = alink.string
|
||||
alink.replaceWith(tstr)
|
||||
return soup
|
27
recipes/app_funds.recipe
Normal file
@ -0,0 +1,27 @@
|
||||
#!/usr/bin/env python
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__author__ = 'teepel <teepel44@gmail.com>'
|
||||
|
||||
'''
|
||||
appfunds.blogspot.com
|
||||
'''
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class app_funds(BasicNewsRecipe):
|
||||
title = u'APP Funds'
|
||||
__author__ = 'teepel <teepel44@gmail.com>'
|
||||
language = 'pl'
|
||||
description ='Blog inwestora dla inwestorów i oszczędzających'
|
||||
INDEX='http://appfunds.blogspot.com'
|
||||
remove_empty_feeds= True
|
||||
oldest_article = 7
|
||||
max_articles_per_feed = 100
|
||||
simultaneous_downloads = 5
|
||||
remove_javascript=True
|
||||
no_stylesheets=True
|
||||
auto_cleanup = True
|
||||
|
||||
feeds = [(u'blog', u'http://feeds.feedburner.com/blogspot/etVI')]
|
||||
|
30
recipes/autosport.recipe
Normal file
@ -0,0 +1,30 @@
|
||||
#!/usr/bin/env python
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__author__ = 'MrStefan <mrstefaan@gmail.com>'
|
||||
|
||||
'''
|
||||
www.autosport.com
|
||||
'''
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class autosport(BasicNewsRecipe):
|
||||
title = u'Autosport'
|
||||
__author__ = 'MrStefan <mrstefaan@gmail.com>'
|
||||
language = 'en_GB'
|
||||
description =u'Daily Formula 1 and motorsport news from the leading weekly motor racing magazine. The authority on Formula 1, F1, MotoGP, GP2, Champ Car, Le Mans...'
|
||||
masthead_url='http://cdn.images.autosport.com/asdotcom.gif'
|
||||
remove_empty_feeds= True
|
||||
oldest_article = 1
|
||||
max_articles_per_feed = 100
|
||||
remove_javascript=True
|
||||
no_stylesheets=True
|
||||
|
||||
keep_only_tags =[]
|
||||
keep_only_tags.append(dict(name = 'h1', attrs = {'class' : 'news_headline'}))
|
||||
keep_only_tags.append(dict(name = 'td', attrs = {'class' : 'news_article_author'}))
|
||||
keep_only_tags.append(dict(name = 'td', attrs = {'class' : 'news_article_date'}))
|
||||
keep_only_tags.append(dict(name = 'p'))
|
||||
|
||||
feeds = [(u'ALL NEWS', u'http://www.autosport.com/rss/allnews.xml')]
|
50
recipes/bankier_pl.recipe
Normal file
@ -0,0 +1,50 @@
|
||||
#!/usr/bin/env python
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__author__ = 'teepel <teepel44@gmail.com>'
|
||||
|
||||
'''
|
||||
bankier.pl
|
||||
'''
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class bankier(BasicNewsRecipe):
|
||||
title = u'Bankier.pl'
|
||||
__author__ = 'teepel <teepel44@gmail.com>'
|
||||
language = 'pl'
|
||||
description ='Polski portal finansowy. Informacje o: gospodarka, inwestowanie, finanse osobiste, prowadzenie firmy, kursy walut, notowania akcji, fundusze.'
|
||||
masthead_url='http://www.bankier.pl/gfx/hd-mid-02.gif'
|
||||
INDEX='http://bankier.pl/'
|
||||
remove_empty_feeds= True
|
||||
oldest_article = 1
|
||||
max_articles_per_feed = 100
|
||||
remove_javascript=True
|
||||
no_stylesheets=True
|
||||
simultaneous_downloads = 5
|
||||
|
||||
keep_only_tags =[]
|
||||
keep_only_tags.append(dict(name = 'div', attrs = {'align' : 'left'}))
|
||||
|
||||
remove_tags =[]
|
||||
remove_tags.append(dict(name = 'table', attrs = {'cellspacing' : '2'}))
|
||||
remove_tags.append(dict(name = 'div', attrs = {'align' : 'center'}))
|
||||
remove_tags.append(dict(name = 'img', attrs = {'src' : '/gfx/hd-mid-02.gif'}))
|
||||
#remove_tags.append(dict(name = 'a', attrs = {'target' : '_blank'}))
|
||||
#remove_tags.append(dict(name = 'br', attrs = {'clear' : 'all'}))
|
||||
|
||||
feeds = [
|
||||
(u'Wiadomości dnia', u'http://feeds.feedburner.com/bankier-wiadomosci-dnia'),
|
||||
(u'Finanse osobiste', u'http://feeds.feedburner.com/bankier-finanse-osobiste'),
|
||||
(u'Firma', u'http://feeds.feedburner.com/bankier-firma'),
|
||||
(u'Giełda', u'http://feeds.feedburner.com/bankier-gielda'),
|
||||
(u'Rynek walutowy', u'http://feeds.feedburner.com/bankier-rynek-walutowy'),
|
||||
(u'Komunikaty ze spółek', u'http://feeds.feedburner.com/bankier-espi'),
|
||||
]
|
||||
def print_version(self, url):
|
||||
segment = url.split('.')
|
||||
urlPart = segment[2]
|
||||
segments = urlPart.split('-')
|
||||
urlPart2 = segments[-1]
|
||||
return 'http://www.bankier.pl/wiadomosci/print.html?article_id=' + urlPart2
|
||||
|
55
recipes/blesk.recipe
Normal file
@ -0,0 +1,55 @@
|
||||
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
|
||||
from __future__ import unicode_literals
|
||||
from calibre.web.feeds.recipes import BasicNewsRecipe
|
||||
import re
|
||||
|
||||
class bleskRecipe(BasicNewsRecipe):
|
||||
__author__ = 'bubak'
|
||||
title = u'Blesk'
|
||||
publisher = u''
|
||||
description = 'blesk.cz'
|
||||
oldest_article = 1
|
||||
max_articles_per_feed = 20
|
||||
use_embedded_content = False
|
||||
|
||||
feeds = [
|
||||
(u'Zprávy', u'http://www.blesk.cz/rss/7'),
|
||||
(u'Blesk', u'http://www.blesk.cz/rss/1'),
|
||||
(u'Sex a tabu', u'http://www.blesk.cz/rss/2'),
|
||||
(u'Celebrity', u'http://www.blesk.cz/rss/5'),
|
||||
(u'Cestování', u'http://www.blesk.cz/rss/12')
|
||||
]
|
||||
|
||||
|
||||
#encoding = 'iso-8859-2'
|
||||
language = 'cs'
|
||||
cover_url = 'http://img.blesk.cz/images/blesk/blesk-logo.png'
|
||||
remove_javascript = True
|
||||
no_stylesheets = True
|
||||
extra_css = """
|
||||
"""
|
||||
|
||||
remove_attributes = []
|
||||
remove_tags_before = dict(name='div', attrs={'id':['boxContent']})
|
||||
remove_tags_after = dict(name='div', attrs={'class':['artAuthors']})
|
||||
remove_tags = [dict(name='div', attrs={'class':['link_clanek']}),
|
||||
dict(name='div', attrs={'id':['partHeader']}),
|
||||
dict(name='div', attrs={'id':['top_bottom_box', 'lista_top']})]
|
||||
preprocess_regexps = [(re.compile(r'<div class="(textovytip|related)".*', re.DOTALL|re.IGNORECASE), lambda match: '</body>')]
|
||||
|
||||
keep_only_tags = [dict(name='div', attrs={'class':'articleContent'})]
|
||||
|
||||
visited_urls = {}
|
||||
def get_article_url(self, article):
|
||||
url = BasicNewsRecipe.get_article_url(self, article)
|
||||
if url in self.visited_urls:
|
||||
self.log.debug('Ignoring duplicate: ' + url)
|
||||
return None
|
||||
else:
|
||||
self.visited_urls[url] = True
|
||||
self.log.debug('Accepting: ' + url)
|
||||
return url
|
||||
|
||||
|
||||
|
||||
|
28
recipes/blognexto.recipe
Normal file
@ -0,0 +1,28 @@
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class blognexto(BasicNewsRecipe):
|
||||
title = 'BLOG.NEXTO.pl'
|
||||
__author__ = 'MrStefan <mrstefaan@gmail.com>'
|
||||
language = 'pl'
|
||||
description ='o e-publikacjach prawie wszystko'
|
||||
masthead_url='http://blog.nexto.pl/wp-content/uploads/2012/04/logo-blog-nexto.pl_.jpg'
|
||||
remove_empty_feeds= True
|
||||
oldest_article = 7
|
||||
max_articles_per_feed = 100
|
||||
remove_javascript=True
|
||||
no_stylesheets=True
|
||||
|
||||
|
||||
keep_only_tags =[]
|
||||
keep_only_tags.append(dict(name = 'div', attrs = {'id' : 'content'}))
|
||||
|
||||
remove_tags =[]
|
||||
remove_tags.append(dict(name = 'div', attrs = {'class' : 'comment-cloud'}))
|
||||
remove_tags.append(dict(name = 'p', attrs = {'class' : 'post-date1'}))
|
||||
remove_tags.append(dict(name = 'div', attrs = {'class' : 'fb-like'}))
|
||||
remove_tags.append(dict(name = 'div', attrs = {'class' : 'tags'}))
|
||||
remove_tags.append(dict(name = 'div', attrs = {'class' : 'postnavi'}))
|
||||
remove_tags.append(dict(name = 'div', attrs = {'class' : 'commments-box'}))
|
||||
remove_tags.append(dict(name = 'div', attrs = {'id' : 'respond'}))
|
||||
|
||||
feeds = [('Artykuly', 'http://feeds.feedburner.com/blognexto')]
|
140
recipes/brewiarz.recipe
Normal file
@ -0,0 +1,140 @@
|
||||
#!/usr/bin/env python
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
import datetime, re
|
||||
|
||||
|
||||
class brewiarz(BasicNewsRecipe):
|
||||
title = u'Brewiarz'
|
||||
__author__ = 'Artur Stachecki <artur.stachecki@gmail.com>'
|
||||
language = 'pl'
|
||||
description = u'Serwis poświęcony Liturgii Godzin (brewiarzowi) - formie codziennej modlitwy Kościoła katolickiego.'
|
||||
masthead_url = 'http://brewiarz.pl/images/logo2.gif'
|
||||
max_articles_per_feed = 100
|
||||
remove_javascript = True
|
||||
no_stylesheets = True
|
||||
publication_type = 'newspaper'
|
||||
next_days = 1
|
||||
|
||||
def parse_index(self):
|
||||
dec2rom_dict = {"01": "i", "02": "ii", "03": "iii", "04": "iv",
|
||||
"05": "v", "06": "vi", "07": "vii", "08": "viii",
|
||||
"09": "ix", "10": "x", "11": "xi", "12": "xii"}
|
||||
|
||||
weekday_dict = {"Sunday": "Niedziela", "Monday": "Poniedziałek", "Tuesday": "Wtorek",
|
||||
"Wednesday": "Środa", "Thursday": "Czwartek", "Friday": "Piątek", "Saturday": "Sobota"}
|
||||
|
||||
now = datetime.datetime.now()
|
||||
|
||||
feeds = []
|
||||
for i in range(0, self.next_days):
|
||||
url_date = now + datetime.timedelta(days=i)
|
||||
url_date_month = url_date.strftime("%m")
|
||||
url_date_month_roman = dec2rom_dict[url_date_month]
|
||||
url_date_day = url_date.strftime("%d")
|
||||
url_date_year = url_date.strftime("%Y")[2:]
|
||||
url_date_weekday = url_date.strftime("%A")
|
||||
url_date_weekday_pl = weekday_dict[url_date_weekday]
|
||||
|
||||
url = "http://brewiarz.pl/" + url_date_month_roman + "_" + url_date_year + "/" + url_date_day + url_date_month + "/index.php3"
|
||||
articles = self.parse_pages(url)
|
||||
if articles:
|
||||
title = url_date_weekday_pl + " " + url_date_day + "." + url_date_month + "." + url_date_year
|
||||
feeds.append((title, articles))
|
||||
else:
|
||||
sectors = self.get_sectors(url)
|
||||
for subpage in sectors:
|
||||
title = url_date_weekday_pl + " " + url_date_day + "." + url_date_month + "." + url_date_year + " - " + subpage.string
|
||||
url = "http://brewiarz.pl/" + url_date_month_roman + "_" + url_date_year + "/" + url_date_day + url_date_month + "/" + subpage['href']
|
||||
print(url)
|
||||
articles = self.parse_pages(url)
|
||||
if articles:
|
||||
feeds.append((title, articles))
|
||||
return feeds
|
||||
|
||||
def get_sectors(self, url):
|
||||
sectors = []
|
||||
soup = self.index_to_soup(url)
|
||||
sectors_table = soup.find(name='table', attrs={'width': '490'})
|
||||
sector_links = sectors_table.findAll(name='a')
|
||||
for sector_links_modified in sector_links:
|
||||
link_parent_text = sector_links_modified.findParent(name='div').text
|
||||
if link_parent_text:
|
||||
sector_links_modified.text = link_parent_text.text
|
||||
sectors.append(sector_links_modified)
|
||||
return sectors
|
||||
|
||||
def parse_pages(self, url):
|
||||
current_articles = []
|
||||
soup = self.index_to_soup(url)
|
||||
www = soup.find(attrs={'class': 'www'})
|
||||
if www:
|
||||
box_title = www.find(text='Teksty LG')
|
||||
article_box_parent = box_title.findParent('ul')
|
||||
article_box_sibling = article_box_parent.findNextSibling('ul')
|
||||
for li in article_box_sibling.findAll('li'):
|
||||
link = li.find(name='a')
|
||||
ol = link.findNextSibling(name='ol')
|
||||
if ol:
|
||||
sublinks = ol.findAll(name='a')
|
||||
for sublink in sublinks:
|
||||
link_title = self.tag_to_string(link) + " - " + self.tag_to_string(sublink)
|
||||
link_url_print = re.sub('php3', 'php3?kr=_druk&wr=lg&', sublink['href'])
|
||||
link_url = url[:-10] + link_url_print
|
||||
current_articles.append({'title': link_title,
|
||||
'url': link_url, 'description': '', 'date': ''})
|
||||
else:
|
||||
if link.findParent(name = 'ol'):
|
||||
continue
|
||||
else:
|
||||
link_title = self.tag_to_string(link)
|
||||
link_url_print = re.sub('php3', 'php3?kr=_druk&wr=lg&', link['href'])
|
||||
link_url = url[:-10] + link_url_print
|
||||
current_articles.append({'title': link_title,
|
||||
'url': link_url, 'description': '', 'date': ''})
|
||||
return current_articles
|
||||
else:
|
||||
return None
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
footer = soup.find(name='a', attrs={'href': 'http://brewiarz.pl'})
|
||||
footer_parent = footer.findParent('div')
|
||||
footer_parent.extract()
|
||||
|
||||
header = soup.find(text='http://brewiarz.pl')
|
||||
header_parent = header.findParent('div')
|
||||
header_parent.extract()
|
||||
|
||||
subheader = soup.find(text='Kolor szat:').findParent('div')
|
||||
subheader.extract()
|
||||
|
||||
color = soup.find('b')
|
||||
color.extract()
|
||||
|
||||
cleaned = self.strip_tags(soup)
|
||||
|
||||
div = cleaned.findAll(name='div')
|
||||
div[1].extract()
|
||||
div[2].extract()
|
||||
div[3].extract()
|
||||
|
||||
return cleaned
|
||||
|
||||
def strip_tags(self, soup_dirty):
|
||||
VALID_TAGS = ['p', 'div', 'br', 'b', 'a', 'title', 'head', 'html', 'body']
|
||||
|
||||
for tag in soup_dirty.findAll(True):
|
||||
if tag.name not in VALID_TAGS:
|
||||
for i, x in enumerate(tag.parent.contents):
|
||||
if x == tag:
|
||||
break
|
||||
else:
|
||||
print "Can't find", tag, "in", tag.parent
|
||||
continue
|
||||
for r in reversed(tag.contents):
|
||||
tag.parent.insert(i, r)
|
||||
tag.extract()
|
||||
|
||||
return soup_dirty
|
45
recipes/buchreport.recipe
Normal file
@ -0,0 +1,45 @@
|
||||
from calibre.web.feeds.recipes import BasicNewsRecipe
|
||||
|
||||
'''Calibre recipe to convert the RSS feeds of the Buchreport to an ebook.'''
|
||||
|
||||
class Buchreport(BasicNewsRecipe) :
|
||||
__author__ = 'a.peter'
|
||||
__copyright__ = 'a.peter'
|
||||
__license__ = 'GPL v3'
|
||||
description = 'Buchreport'
|
||||
version = 4
|
||||
title = u'Buchreport'
|
||||
timefmt = ' [%d.%m.%Y]'
|
||||
encoding = 'cp1252'
|
||||
language = 'de'
|
||||
|
||||
|
||||
extra_css = 'body { margin-left: 0.00em; margin-right: 0.00em; } \
|
||||
article, articledate, articledescription { text-align: left; } \
|
||||
h1 { text-align: left; font-size: 140%; font-weight: bold; } \
|
||||
h2 { text-align: left; font-size: 100%; font-weight: bold; font-style: italic; } \
|
||||
h3 { text-align: left; font-size: 100%; font-weight: regular; font-style: italic; } \
|
||||
h4, h5, h6 { text-align: left; font-size: 100%; font-weight: bold; }'
|
||||
|
||||
oldest_article = 7.0
|
||||
no_stylesheets = True
|
||||
remove_javascript = True
|
||||
use_embedded_content = False
|
||||
publication_type = 'newspaper'
|
||||
|
||||
remove_tags_before = dict(name='h2')
|
||||
remove_tags_after = [
|
||||
dict(name='div', attrs={'style':["padding-top:10px;clear:both"]})
|
||||
]
|
||||
remove_tags = [
|
||||
dict(name='div', attrs={'style':["padding-top:10px;clear:both"]}),
|
||||
dict(name='iframe'),
|
||||
dict(name='img')
|
||||
]
|
||||
|
||||
feeds = [
|
||||
(u'Buchreport', u'http://www.buchreport.de/index.php?id=5&type=100')
|
||||
]
|
||||
|
||||
def get_masthead_url(self):
|
||||
return 'http://www.buchreport.de/fileadmin/template/img/buchreport_logo.jpg'
|
@ -1,5 +1,5 @@
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2009-2010, Darko Miletic <darko.miletic at gmail.com>'
|
||||
__copyright__ = '2009-2012, Darko Miletic <darko.miletic at gmail.com>'
|
||||
'''
|
||||
www.business-standard.com
|
||||
'''
|
||||
@ -14,10 +14,12 @@ class BusinessStandard(BasicNewsRecipe):
|
||||
max_articles_per_feed = 100
|
||||
no_stylesheets = True
|
||||
use_embedded_content = False
|
||||
auto_cleanup = False
|
||||
encoding = 'cp1252'
|
||||
publisher = 'Business Standard Limited'
|
||||
category = 'news, business, money, india, world'
|
||||
language = 'en_IN'
|
||||
masthead_url = 'http://feeds.business-standard.com/images/logo_08.jpg'
|
||||
|
||||
conversion_options = {
|
||||
'comments' : description
|
||||
@ -26,7 +28,7 @@ class BusinessStandard(BasicNewsRecipe):
|
||||
,'publisher' : publisher
|
||||
,'linearize_tables': True
|
||||
}
|
||||
keep_only_tags=[dict(attrs={'class':'TableClas'})]
|
||||
#keep_only_tags=[dict(name='td', attrs={'class':'TableClas'})]
|
||||
remove_tags = [
|
||||
dict(name=['object','link','script','iframe','base','meta'])
|
||||
,dict(attrs={'class':'rightDiv2'})
|
||||
@ -45,3 +47,8 @@ class BusinessStandard(BasicNewsRecipe):
|
||||
,(u'Management & Mktg' , u'http://feeds.business-standard.com/rss/7_0.xml' )
|
||||
,(u'Opinion' , u'http://feeds.business-standard.com/rss/5_0.xml' )
|
||||
]
|
||||
|
||||
def print_version(self, url):
|
||||
l, s, tp = url.rpartition('/')
|
||||
t, k, autono = l.rpartition('/')
|
||||
return 'http://www.business-standard.com/india/printpage.php?autono=' + autono + '&tp=' + tp
|
||||
|
68
recipes/ceska_pozice.recipe
Normal file
@ -0,0 +1,68 @@
|
||||
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
|
||||
from __future__ import unicode_literals
|
||||
from calibre.web.feeds.recipes import BasicNewsRecipe
|
||||
|
||||
class ceskaPoziceRecipe(BasicNewsRecipe):
|
||||
__author__ = 'bubak'
|
||||
title = u'Česká pozice'
|
||||
description = 'Česká pozice'
|
||||
oldest_article = 2
|
||||
max_articles_per_feed = 20
|
||||
|
||||
feeds = [
|
||||
(u'Všechny články', u'http://www.ceskapozice.cz/rss.xml'),
|
||||
(u'Domov', u'http://www.ceskapozice.cz/taxonomy/term/16/feed'),
|
||||
(u'Chrono', u'http://www.ceskapozice.cz/chrono/feed'),
|
||||
(u'Evropa', u'http://www.ceskapozice.cz/taxonomy/term/17/feed')
|
||||
]
|
||||
|
||||
|
||||
language = 'cs'
|
||||
cover_url = 'http://www.ceskapozice.cz/sites/default/files/cpozice_logo.png'
|
||||
remove_javascript = True
|
||||
no_stylesheets = True
|
||||
domain = u'http://www.ceskapozice.cz'
|
||||
use_embedded_content = False
|
||||
|
||||
|
||||
remove_tags = [dict(name='div', attrs={'class':['block-ad', 'region region-content-ad']}),
|
||||
dict(name='ul', attrs={'class':'links'}),
|
||||
dict(name='div', attrs={'id':['comments', 'back-to-top']}),
|
||||
dict(name='div', attrs={'class':['next-page', 'region region-content-ad']}),
|
||||
dict(name='cite')]
|
||||
|
||||
keep_only_tags = [dict(name='div', attrs={'id':'content'})]
|
||||
|
||||
visited_urls = {}
|
||||
def get_article_url(self, article):
|
||||
url = BasicNewsRecipe.get_article_url(self, article)
|
||||
if url in self.visited_urls:
|
||||
self.log.debug('Ignoring duplicate: ' + url)
|
||||
return None
|
||||
else:
|
||||
self.visited_urls[url] = True
|
||||
self.log.debug('Accepting: ' + url)
|
||||
return url
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
self.append_page(soup, soup.body, 3)
|
||||
return soup
|
||||
|
||||
def append_page(self, soup, appendtag, position):
|
||||
pager = soup.find('div', attrs={'class':'paging-bottom'})
|
||||
if pager:
|
||||
nextbutton = pager.find('li', attrs={'class':'pager-next'})
|
||||
if nextbutton:
|
||||
nexturl = self.domain + nextbutton.a['href']
|
||||
soup2 = self.index_to_soup(nexturl)
|
||||
texttag = soup2.find('div', attrs={'class':'main-body'})
|
||||
for it in texttag.findAll('div', attrs={'class':'region region-content-ad'}):
|
||||
it.extract()
|
||||
for it in texttag.findAll('cite'):
|
||||
it.extract()
|
||||
newpos = len(texttag.contents)
|
||||
self.append_page(soup2, texttag, newpos)
|
||||
texttag.extract()
|
||||
appendtag.insert(position, texttag)
|
||||
pager.extract()
|
||||
|
30
recipes/ceske_noviny.recipe
Normal file
@ -0,0 +1,30 @@
|
||||
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
|
||||
from __future__ import unicode_literals
|
||||
from calibre.web.feeds.recipes import BasicNewsRecipe
|
||||
|
||||
class ceskenovinyRecipe(BasicNewsRecipe):
|
||||
__author__ = 'bubak'
|
||||
title = u'České Noviny'
|
||||
description = 'ceskenoviny.cz'
|
||||
oldest_article = 1
|
||||
max_articles_per_feed = 20
|
||||
|
||||
feeds = [
|
||||
(u'Domácí', u'http://www.ceskenoviny.cz/sluzby/rss/domov.php')
|
||||
#,(u'Hlavní události', u'http://www.ceskenoviny.cz/sluzby/rss/index.php')
|
||||
#,(u'Přehled zpráv', u'http://www.ceskenoviny.cz/sluzby/rss/zpravy.php')
|
||||
#,(u'Ze světa', u'http://www.ceskenoviny.cz/sluzby/rss/svet.php')
|
||||
#,(u'Kultura', u'http://www.ceskenoviny.cz/sluzby/rss/kultura.php')
|
||||
#,(u'IT', u'http://www.ceskenoviny.cz/sluzby/rss/pocitace.php')
|
||||
]
|
||||
|
||||
|
||||
language = 'cs'
|
||||
cover_url = 'http://i4.cn.cz/grafika/cn_logo-print.gif'
|
||||
remove_javascript = True
|
||||
no_stylesheets = True
|
||||
|
||||
remove_attributes = []
|
||||
filter_regexps = [r'img.aktualne.centrum.cz']
|
||||
|
||||
keep_only_tags = [dict(name='div', attrs={'id':'clnk'})]
|
26
recipes/cesky_rozhlas_6.recipe
Normal file
@ -0,0 +1,26 @@
|
||||
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
|
||||
from __future__ import unicode_literals
|
||||
from calibre.web.feeds.recipes import BasicNewsRecipe
|
||||
|
||||
class cro6Recipe(BasicNewsRecipe):
|
||||
__author__ = 'bubak'
|
||||
title = u'Český rozhlas 6'
|
||||
description = 'Český rozhlas 6'
|
||||
oldest_article = 1
|
||||
max_articles_per_feed = 20
|
||||
|
||||
feeds = [
|
||||
(u'Český rozhlas 6', u'http://www.rozhlas.cz/export/cro6/')
|
||||
]
|
||||
|
||||
|
||||
language = 'cs'
|
||||
cover_url = 'http://www.rozhlas.cz/img/e5/logo/cro6.png'
|
||||
remove_javascript = True
|
||||
no_stylesheets = True
|
||||
|
||||
remove_attributes = []
|
||||
remove_tags = [dict(name='div', attrs={'class':['audio-play-all', 'poradHeaders', 'actions']}),
|
||||
dict(name='p', attrs={'class':['para-last']})]
|
||||
|
||||
keep_only_tags = [dict(name='div', attrs={'id':'article'})]
|
39
recipes/demagog.cz.recipe
Normal file
@ -0,0 +1,39 @@
|
||||
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
|
||||
from __future__ import unicode_literals
|
||||
from calibre.web.feeds.recipes import BasicNewsRecipe
|
||||
import re
|
||||
|
||||
class demagogRecipe(BasicNewsRecipe):
|
||||
__author__ = 'bubak'
|
||||
title = u'Demagog.cz'
|
||||
publisher = u''
|
||||
description = 'demagog.cz'
|
||||
oldest_article = 6
|
||||
max_articles_per_feed = 20
|
||||
use_embedded_content = False
|
||||
remove_empty_feeds = True
|
||||
|
||||
feeds = [
|
||||
(u'Aktuality', u'http://demagog.cz/rss')
|
||||
]
|
||||
|
||||
|
||||
#encoding = 'iso-8859-2'
|
||||
language = 'cs'
|
||||
cover_url = 'http://demagog.cz/content/images/demagog.cz.png'
|
||||
remove_javascript = True
|
||||
no_stylesheets = True
|
||||
extra_css = """
|
||||
.vyrok_suhrn{margin-top:50px; }
|
||||
.vyrok{margin-bottom:30px; }
|
||||
"""
|
||||
|
||||
remove_tags = [dict(name='a', attrs={'class':'vyrok_odovodnenie_tgl'}),
|
||||
dict(name='img', attrs={'class':'vyrok_fotografia'})]
|
||||
remove_tags_before = dict(name='h1')
|
||||
remove_tags_after = dict(name='div', attrs={'class':'vyrok_text_after'})
|
||||
preprocess_regexps = [(re.compile(r'(<div class="vyrok_suhrn">)', re.DOTALL|re.IGNORECASE), lambda match: '\1<hr>')]
|
||||
|
||||
|
||||
|
||||
|
36
recipes/denik.cz.recipe
Normal file
@ -0,0 +1,36 @@
|
||||
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
|
||||
from __future__ import unicode_literals
|
||||
from calibre.web.feeds.recipes import BasicNewsRecipe
|
||||
|
||||
class ceskyDenikRecipe(BasicNewsRecipe):
|
||||
__author__ = 'bubak'
|
||||
title = u'denik.cz'
|
||||
publisher = u''
|
||||
description = u'Český deník'
|
||||
oldest_article = 1
|
||||
max_articles_per_feed = 20
|
||||
use_embedded_content = False
|
||||
remove_empty_feeds = True
|
||||
|
||||
feeds = [
|
||||
(u'Z domova', u'http://www.denik.cz/rss/z_domova.html')
|
||||
,(u'Pražský deník - Moje Praha', u'http://prazsky.denik.cz/rss/zpravy_region.html')
|
||||
#,(u'Zahraničí', u'http://www.denik.cz/rss/ze_sveta.html')
|
||||
#,(u'Kultura', u'http://www.denik.cz/rss/kultura.html')
|
||||
]
|
||||
|
||||
|
||||
#encoding = 'iso-8859-2'
|
||||
language = 'cs'
|
||||
cover_url = 'http://g.denik.cz/images/loga/denik.png'
|
||||
remove_javascript = True
|
||||
no_stylesheets = True
|
||||
extra_css = """
|
||||
"""
|
||||
|
||||
remove_tags = []
|
||||
keep_only_tags = [dict(name='div', attrs={'class':'content'})]
|
||||
#remove_tags_before = dict(name='h1')
|
||||
remove_tags_after = dict(name='p', attrs={'class':'clanek-autor'})
|
||||
|
||||
|
28
recipes/denik_referendum.recipe
Normal file
@ -0,0 +1,28 @@
|
||||
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
|
||||
from __future__ import unicode_literals
|
||||
from calibre.web.feeds.recipes import BasicNewsRecipe
|
||||
|
||||
class denikReferendumRecipe(BasicNewsRecipe):
|
||||
__author__ = 'bubak'
|
||||
title = u'Den\u00edk Referendum'
|
||||
publisher = u''
|
||||
description = ''
|
||||
oldest_article = 1
|
||||
max_articles_per_feed = 20
|
||||
|
||||
feeds = [
|
||||
(u'Deník Referendum', u'http://feeds.feedburner.com/DenikReferendum')
|
||||
]
|
||||
|
||||
|
||||
#encoding = 'iso-8859-2'
|
||||
language = 'cs'
|
||||
remove_javascript = True
|
||||
no_stylesheets = True
|
||||
use_embedded_content = False
|
||||
remove_attributes = []
|
||||
remove_tags_after = dict(name='div', attrs={'class':['text']})
|
||||
remove_tags = [dict(name='div', attrs={'class':['box boxLine', 'box noprint', 'box']}),
|
||||
dict(name='h3', attrs={'class':'head alt'})]
|
||||
|
||||
keep_only_tags = [dict(name='div', attrs={'id':['content']})]
|
@ -6,7 +6,6 @@ class Dobreprogramy_pl(BasicNewsRecipe):
|
||||
__author__ = 'fenuks'
|
||||
__licence__ ='GPL v3'
|
||||
category = 'IT'
|
||||
language = 'pl'
|
||||
masthead_url='http://static.dpcdn.pl/css/Black/Images/header_logo_napis_fullVersion.png'
|
||||
cover_url = 'http://userlogos.org/files/logos/Karmody/dobreprogramy_01.png'
|
||||
description = u'Aktualności i blogi z dobreprogramy.pl'
|
||||
@ -29,4 +28,4 @@ class Dobreprogramy_pl(BasicNewsRecipe):
|
||||
for a in soup('a'):
|
||||
if a.has_key('href') and 'http://' not in a['href'] and 'https://' not in a['href']:
|
||||
a['href']=self.index + a['href']
|
||||
return soup
|
||||
return soup
|
||||
|
@ -7,6 +7,7 @@ class AdvancedUserRecipe1332847053(BasicNewsRecipe):
|
||||
title = u'Editoriali'
|
||||
__author__ = 'faber1971'
|
||||
description = 'Leading articles on Italy by the best Italian editorials'
|
||||
language = 'it'
|
||||
|
||||
oldest_article = 1
|
||||
max_articles_per_feed = 100
|
||||
|
35
recipes/f1_ultra.recipe
Normal file
@ -0,0 +1,35 @@
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
import re
|
||||
|
||||
class f1ultra(BasicNewsRecipe):
|
||||
title = u'Formuła 1 - F1 ultra'
|
||||
__license__ = 'GPL v3'
|
||||
__author__ = 'MrStefan <mrstefaan@gmail.com>, Artur Stachecki <artur.stachecki@gmail.com>'
|
||||
language = 'pl'
|
||||
description =u'Formuła 1, Robert Kubica, F3, GP2 oraz inne serie wyścigowe.'
|
||||
masthead_url='http://www.f1ultra.pl/templates/f1ultra/images/logo.gif'
|
||||
remove_empty_feeds= True
|
||||
oldest_article = 1
|
||||
max_articles_per_feed = 100
|
||||
remove_javascript=True
|
||||
no_stylesheets=True
|
||||
|
||||
keep_only_tags =[(dict(name = 'div', attrs = {'id' : 'main'}))]
|
||||
remove_tags_after =[dict(attrs = {'style' : 'margin-top:5px;margin-bottom:5px;display: inline;'})]
|
||||
remove_tags =[(dict(attrs = {'class' : ['buttonheading', 'avPlayerContainer', 'createdate']}))]
|
||||
remove_tags.append(dict(attrs = {'title' : ['PDF', 'Drukuj', 'Email']}))
|
||||
remove_tags.append(dict(name = 'form', attrs = {'method' : 'post'}))
|
||||
remove_tags.append(dict(name = 'hr', attrs = {'size' : '2'}))
|
||||
|
||||
preprocess_regexps = [(re.compile(r'align="left"'), lambda match: ''),
|
||||
(re.compile(r'align="right"'), lambda match: ''),
|
||||
(re.compile(r'width=\"*\"'), lambda match: ''),
|
||||
(re.compile(r'\<table .*?\>'), lambda match: '')]
|
||||
|
||||
|
||||
extra_css = '''.contentheading { font-size: 1.4em; font-weight: bold; }
|
||||
img { display: block; clear: both;}
|
||||
'''
|
||||
remove_attributes = ['width','height','position','float','padding-left','padding-right','padding','text-align']
|
||||
|
||||
feeds = [(u'F1 Ultra', u'http://www.f1ultra.pl/index.php?option=com_rd_rss&id=1&Itemid=245')]
|
@ -8,6 +8,7 @@ from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class AdvancedUserRecipe1349086293(BasicNewsRecipe):
|
||||
title = u'Foreign Policy'
|
||||
language = 'en'
|
||||
__author__ = 'Darko Miletic'
|
||||
description = 'International News'
|
||||
publisher = 'Washingtonpost.Newsweek Interactive, LLC'
|
||||
|
@ -1,39 +1,88 @@
|
||||
#!/usr/bin/env python
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = u'2010, Tomasz Dlugosz <tomek3d@gmail.com>'
|
||||
__copyright__ = u'2010-2012, Tomasz Dlugosz <tomek3d@gmail.com>'
|
||||
'''
|
||||
fronda.pl
|
||||
'''
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
import re
|
||||
from datetime import timedelta, date
|
||||
|
||||
class Fronda(BasicNewsRecipe):
|
||||
title = u'Fronda.pl'
|
||||
publisher = u'Fronda.pl'
|
||||
description = u'Portal po\u015bwi\u0119cony - Infformacje'
|
||||
description = u'Portal po\u015bwi\u0119cony - Informacje'
|
||||
language = 'pl'
|
||||
__author__ = u'Tomasz D\u0142ugosz'
|
||||
oldest_article = 7
|
||||
max_articles_per_feed = 100
|
||||
use_embedded_content = False
|
||||
no_stylesheets = True
|
||||
|
||||
feeds = [(u'Infformacje', u'http://fronda.pl/news/feed')]
|
||||
extra_css = '''
|
||||
h1 {font-size:150%}
|
||||
.body {text-align:left;}
|
||||
div.headline {font-weight:bold}
|
||||
'''
|
||||
|
||||
keep_only_tags = [dict(name='h2', attrs={'class':'news_title'}),
|
||||
dict(name='div', attrs={'class':'naglowek_tresc'}),
|
||||
dict(name='div', attrs={'id':'czytaj'}) ]
|
||||
earliest_date = date.today() - timedelta(days=oldest_article)
|
||||
|
||||
remove_tags = [dict(name='a', attrs={'class':'print'})]
|
||||
def date_cut(self,datestr):
|
||||
# eg. 5.11.2012, 12:07
|
||||
timestamp = datestr.split(',')[0]
|
||||
parts = timestamp.split('.')
|
||||
art_date = date(int(parts[2]),int(parts[1]),int(parts[0]))
|
||||
return True if art_date < self.earliest_date else False
|
||||
|
||||
preprocess_regexps = [
|
||||
(re.compile(i[0], re.IGNORECASE | re.DOTALL), i[1]) for i in
|
||||
[ (r'<p><a href="http://fronda.pl/sklepy">.*</a></p>', lambda match: ''),
|
||||
(r'<p><a href="http://fronda.pl/pasaz">.*</a></p>', lambda match: ''),
|
||||
(r'<h3><strong>W.* lektury.*</a></p></div>', lambda match: '</div>'),
|
||||
(r'<h3>Zobacz t.*?</div>', lambda match: '</div>'),
|
||||
(r'<p[^>]*> </p>', lambda match: ''),
|
||||
(r'<p><span style=".*?"><br /></span></p> ', lambda match: ''),
|
||||
(r'<a style=\'float:right;margin-top:3px;\' href="http://www.facebook.com/share.php?.*?</a>', lambda match: '')]
|
||||
]
|
||||
def parse_index(self):
|
||||
genres = [
|
||||
('ekonomia,4.html', 'Ekonomia'),
|
||||
('filozofia,15.html', 'Filozofia'),
|
||||
('historia,6.html', 'Historia'),
|
||||
('kosciol,8.html', 'Kościół'),
|
||||
('kultura,5.html', 'Kultura'),
|
||||
('media,10.html', 'Media'),
|
||||
('nauka,9.html', 'Nauka'),
|
||||
('polityka,11.html', 'Polityka'),
|
||||
('polska,12.html', 'Polska'),
|
||||
('prolife,3.html', 'Prolife'),
|
||||
('religia,7.html', 'Religia'),
|
||||
('rodzina,13.html', 'Rodzina'),
|
||||
('swiat,14.html', 'Świat'),
|
||||
('wydarzenie,16.html', 'Wydarzenie')
|
||||
]
|
||||
feeds = []
|
||||
articles = {}
|
||||
|
||||
for url, genName in genres:
|
||||
soup = self.index_to_soup('http://www.fronda.pl/c/'+ url)
|
||||
articles[genName] = []
|
||||
for item in soup.findAll('li'):
|
||||
article_h = item.find('h2')
|
||||
if not article_h:
|
||||
continue
|
||||
article_date = self.tag_to_string(item.find('b'))
|
||||
if self.date_cut(article_date):
|
||||
continue
|
||||
article_a = article_h.find('a')
|
||||
article_url = 'http://www.fronda.pl' + article_a['href']
|
||||
article_title = self.tag_to_string(article_a)
|
||||
articles[genName].append( { 'title' : article_title, 'url' : article_url, 'date' : article_date })
|
||||
feeds.append((genName, articles[genName]))
|
||||
return feeds
|
||||
|
||||
keep_only_tags = [
|
||||
dict(name='div', attrs={'class':'yui-g'})
|
||||
]
|
||||
|
||||
remove_tags = [
|
||||
dict(name='div', attrs={'class':['related-articles',
|
||||
'button right',
|
||||
'pagination']}),
|
||||
dict(name='h3', attrs={'class':'block-header article comments'}),
|
||||
dict(name='ul', attrs={'class':'comment-list'}),
|
||||
dict(name='ul', attrs={'class':'category'}),
|
||||
dict(name='p', attrs={'id':'comments-disclaimer'}),
|
||||
dict(name='div', attrs={'id':'comment-form'})
|
||||
]
|
||||
|
102
recipes/gazeta_pl_krakow.recipe
Normal file
@ -0,0 +1,102 @@
|
||||
#!/usr/bin/env python
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = 'teepel <teepel44@gmail.com> based on GW from fenuks'
|
||||
|
||||
'''
|
||||
krakow.gazeta.pl
|
||||
'''
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class gw_krakow(BasicNewsRecipe):
|
||||
title = u'Gazeta.pl Kraków'
|
||||
__author__ = 'teepel <teepel44@gmail.com> based on GW from fenuks'
|
||||
language = 'pl'
|
||||
description =u'Wiadomości z Krakowa na portalu Gazeta.pl.'
|
||||
category='newspaper'
|
||||
publication_type = 'newspaper'
|
||||
masthead_url='http://bi.gazeta.pl/im/5/8528/m8528105.gif'
|
||||
INDEX='http://krakow.gazeta.pl/'
|
||||
remove_empty_feeds= True
|
||||
oldest_article = 1
|
||||
max_articles_per_feed = 100
|
||||
remove_javascript=True
|
||||
no_stylesheets=True
|
||||
|
||||
keep_only_tags =[]
|
||||
keep_only_tags.append(dict(name = 'div', attrs = {'id' : 'gazeta_article'}))
|
||||
|
||||
remove_tags =[]
|
||||
remove_tags.append(dict(name = 'div', attrs = {'id' : 'gazeta_article_likes'}))
|
||||
remove_tags.append(dict(name = 'div', attrs = {'id' : 'gazeta_article_tools'}))
|
||||
remove_tags.append(dict(name = 'div', attrs = {'id' : 'rel'}))
|
||||
remove_tags.append(dict(name = 'div', attrs = {'id' : 'gazeta_article_share'}))
|
||||
remove_tags.append(dict(name = 'u1', attrs = {'id' : 'articleToolbar'}))
|
||||
remove_tags.append(dict(name = 'li', attrs = {'class' : 'atComments'}))
|
||||
remove_tags.append(dict(name = 'li', attrs = {'class' : 'atLicense'}))
|
||||
remove_tags.append(dict(name = 'div', attrs = {'id' : 'banP4'}))
|
||||
remove_tags.append(dict(name = 'div', attrs = {'id' : 'article_toolbar'}))
|
||||
remove_tags.append(dict(name = 'div', attrs = {'id' : 'gazeta_article_tags'}))
|
||||
remove_tags.append(dict(name = 'p', attrs = {'class' : 'txt_upl'}))
|
||||
remove_tags.append(dict(name = 'div', attrs = {'class' : 'gazeta_article_related_new'}))
|
||||
remove_tags.append(dict(name = 'div', attrs = {'class' : 'gazetaVideoPlayer'}))
|
||||
remove_tags.append(dict(name = 'div', attrs = {'id' : 'gazeta_article_miniatures'}))
|
||||
remove_tags.append(dict(name = 'div', attrs = {'id' : 'gazeta_article_buttons'}))
|
||||
|
||||
remove_tags_after = [dict(name = 'div', attrs = {'id' : 'gazeta_article_share'})]
|
||||
|
||||
feeds = [(u'Wiadomości', u'http://rss.gazeta.pl/pub/rss/krakow.xml')]
|
||||
|
||||
def skip_ad_pages(self, soup):
|
||||
tag=soup.find(name='a', attrs={'class':'btn'})
|
||||
if tag:
|
||||
new_soup=self.index_to_soup(tag['href'], raw=True)
|
||||
return new_soup
|
||||
|
||||
|
||||
def append_page(self, soup, appendtag):
|
||||
loop=False
|
||||
tag = soup.find('div', attrs={'id':'Str'})
|
||||
if appendtag.find('div', attrs={'id':'Str'}):
|
||||
nexturl=tag.findAll('a')
|
||||
appendtag.find('div', attrs={'id':'Str'}).extract()
|
||||
loop=True
|
||||
if appendtag.find(id='source'):
|
||||
appendtag.find(id='source').extract()
|
||||
while loop:
|
||||
loop=False
|
||||
for link in nexturl:
|
||||
if u'następne' in link.string:
|
||||
url= self.INDEX + link['href']
|
||||
soup2 = self.index_to_soup(url)
|
||||
pagetext = soup2.find(id='artykul')
|
||||
pos = len(appendtag.contents)
|
||||
appendtag.insert(pos, pagetext)
|
||||
tag = soup2.find('div', attrs={'id':'Str'})
|
||||
nexturl=tag.findAll('a')
|
||||
loop=True
|
||||
|
||||
def gallery_article(self, appendtag):
|
||||
tag=appendtag.find(id='container_gal')
|
||||
if tag:
|
||||
nexturl=appendtag.find(id='gal_btn_next').a['href']
|
||||
appendtag.find(id='gal_navi').extract()
|
||||
while nexturl:
|
||||
soup2=self.index_to_soup(nexturl)
|
||||
pagetext=soup2.find(id='container_gal')
|
||||
nexturl=pagetext.find(id='gal_btn_next')
|
||||
if nexturl:
|
||||
nexturl=nexturl.a['href']
|
||||
pos = len(appendtag.contents)
|
||||
appendtag.insert(pos, pagetext)
|
||||
rem=appendtag.find(id='gal_navi')
|
||||
if rem:
|
||||
rem.extract()
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
self.append_page(soup, soup.body)
|
||||
if soup.find(id='container_gal'):
|
||||
self.gallery_article(soup.body)
|
||||
return soup
|
||||
|
99
recipes/gazeta_pl_warszawa.recipe
Normal file
@ -0,0 +1,99 @@
|
||||
#!/usr/bin/env python
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__author__ = 'teepel <teepel44@gmail.com> based on GW from fenuks'
|
||||
|
||||
'''
|
||||
warszawa.gazeta.pl
|
||||
'''
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class gw_wawa(BasicNewsRecipe):
|
||||
title = u'Gazeta.pl Warszawa'
|
||||
__author__ = 'teepel <teepel44@gmail.com> based on GW from fenuks'
|
||||
language = 'pl'
|
||||
description ='Wiadomości z Warszawy na portalu Gazeta.pl.'
|
||||
category='newspaper'
|
||||
publication_type = 'newspaper'
|
||||
masthead_url='http://bi.gazeta.pl/im/3/4089/m4089863.gif'
|
||||
INDEX='http://warszawa.gazeta.pl/'
|
||||
remove_empty_feeds= True
|
||||
oldest_article = 1
|
||||
max_articles_per_feed = 100
|
||||
remove_javascript=True
|
||||
no_stylesheets=True
|
||||
|
||||
keep_only_tags =[]
|
||||
keep_only_tags.append(dict(name = 'div', attrs = {'id' : 'gazeta_article'}))
|
||||
|
||||
remove_tags =[]
|
||||
remove_tags.append(dict(name = 'div', attrs = {'id' : 'gazeta_article_likes'}))
|
||||
remove_tags.append(dict(name = 'div', attrs = {'id' : 'gazeta_article_tools'}))
|
||||
remove_tags.append(dict(name = 'div', attrs = {'id' : 'rel'}))
|
||||
remove_tags.append(dict(name = 'div', attrs = {'id' : 'gazeta_article_share'}))
|
||||
remove_tags.append(dict(name = 'u1', attrs = {'id' : 'articleToolbar'}))
|
||||
remove_tags.append(dict(name = 'li', attrs = {'class' : 'atComments'}))
|
||||
remove_tags.append(dict(name = 'li', attrs = {'class' : 'atLicense'}))
|
||||
remove_tags.append(dict(name = 'div', attrs = {'id' : 'banP4'}))
|
||||
remove_tags.append(dict(name = 'div', attrs = {'id' : 'article_toolbar'}))
|
||||
remove_tags.append(dict(name = 'div', attrs = {'id' : 'gazeta_article_tags'}))
|
||||
remove_tags.append(dict(name = 'p', attrs = {'class' : 'txt_upl'}))
|
||||
remove_tags.append(dict(name = 'div', attrs = {'class' : 'gazeta_article_related_new'}))
|
||||
remove_tags.append(dict(name = 'div', attrs = {'class' : 'gazetaVideoPlayer'}))
|
||||
remove_tags.append(dict(name = 'div', attrs = {'id' : 'gazeta_article_miniatures'}))
|
||||
|
||||
feeds = [(u'Wiadomości', u'http://rss.gazeta.pl/pub/rss/warszawa.xml')]
|
||||
|
||||
def skip_ad_pages(self, soup):
|
||||
tag=soup.find(name='a', attrs={'class':'btn'})
|
||||
if tag:
|
||||
new_soup=self.index_to_soup(tag['href'], raw=True)
|
||||
return new_soup
|
||||
|
||||
|
||||
def append_page(self, soup, appendtag):
|
||||
loop=False
|
||||
tag = soup.find('div', attrs={'id':'Str'})
|
||||
if appendtag.find('div', attrs={'id':'Str'}):
|
||||
nexturl=tag.findAll('a')
|
||||
appendtag.find('div', attrs={'id':'Str'}).extract()
|
||||
loop=True
|
||||
if appendtag.find(id='source'):
|
||||
appendtag.find(id='source').extract()
|
||||
while loop:
|
||||
loop=False
|
||||
for link in nexturl:
|
||||
if u'następne' in link.string:
|
||||
url= self.INDEX + link['href']
|
||||
soup2 = self.index_to_soup(url)
|
||||
pagetext = soup2.find(id='artykul')
|
||||
pos = len(appendtag.contents)
|
||||
appendtag.insert(pos, pagetext)
|
||||
tag = soup2.find('div', attrs={'id':'Str'})
|
||||
nexturl=tag.findAll('a')
|
||||
loop=True
|
||||
|
||||
def gallery_article(self, appendtag):
|
||||
tag=appendtag.find(id='container_gal')
|
||||
if tag:
|
||||
nexturl=appendtag.find(id='gal_btn_next').a['href']
|
||||
appendtag.find(id='gal_navi').extract()
|
||||
while nexturl:
|
||||
soup2=self.index_to_soup(nexturl)
|
||||
pagetext=soup2.find(id='container_gal')
|
||||
nexturl=pagetext.find(id='gal_btn_next')
|
||||
if nexturl:
|
||||
nexturl=nexturl.a['href']
|
||||
pos = len(appendtag.contents)
|
||||
appendtag.insert(pos, pagetext)
|
||||
rem=appendtag.find(id='gal_navi')
|
||||
if rem:
|
||||
rem.extract()
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
self.append_page(soup, soup.body)
|
||||
if soup.find(id='container_gal'):
|
||||
self.gallery_article(soup.body)
|
||||
return soup
|
||||
|
@ -3,7 +3,7 @@ from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
|
||||
class Gazeta_Wyborcza(BasicNewsRecipe):
|
||||
title = u'Gazeta Wyborcza'
|
||||
title = u'Gazeta.pl'
|
||||
__author__ = 'fenuks, Artur Stachecki'
|
||||
language = 'pl'
|
||||
description = 'news from gazeta.pl'
|
||||
|
30
recipes/house_news.recipe
Normal file
@ -0,0 +1,30 @@
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2012, Eddie Lau'
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class AdvancedUserRecipeHouseNews(BasicNewsRecipe):
|
||||
title = u'House News \u4e3b\u5834\u65b0\u805e'
|
||||
__author__ = 'Eddie Lau'
|
||||
publisher = 'House News'
|
||||
oldest_article = 1
|
||||
max_articles_per_feed = 100
|
||||
auto_cleanup = False
|
||||
language = 'zh'
|
||||
encoding = 'utf-8'
|
||||
description = 'http://thehousenews.com'
|
||||
category = 'Chinese, Blogs, Opinion, News, Hong Kong'
|
||||
masthead_url = 'http://thehousenews.com/static/images/housebeta.jpg'
|
||||
extra_css = 'img {display: block; margin-left: auto; margin-right: auto; margin-top: 10px; margin-bottom: 10px; max-height:90%;} p[class=date] {font-size:50%;} div[class=author] {font-size:75%;} p[class=caption] {font-size:50%;}'
|
||||
feeds = [(u'Latest', u'http://thehousenews.com/rss/')]
|
||||
keep_only_tags = [dict(name='h1'),
|
||||
dict(name='div', attrs={'class':['photo']}),
|
||||
dict(name='p', attrs={'class':'caption'}),
|
||||
dict(name='div', attrs={'class':'articleTextWrap'}),
|
||||
dict(name='div', attrs={'class':['author']}),
|
||||
dict(name='p', attrs={'class':'date'})]
|
||||
|
||||
def populate_article_metadata(self, article, soup, first):
|
||||
if first and hasattr(self, 'add_toc_thumbnail'):
|
||||
picdiv = soup.find('img')
|
||||
if picdiv is not None:
|
||||
self.add_toc_thumbnail(article,picdiv['src'])
|
Before Width: | Height: | Size: 1.6 KiB After Width: | Height: | Size: 878 B |
BIN
recipes/icons/antyweb.png
Normal file
After Width: | Height: | Size: 668 B |
BIN
recipes/icons/app_funds.png
Normal file
After Width: | Height: | Size: 471 B |
BIN
recipes/icons/autosport.png
Normal file
After Width: | Height: | Size: 415 B |
BIN
recipes/icons/bankier_pl.png
Normal file
After Width: | Height: | Size: 190 B |
BIN
recipes/icons/blognexto.png
Normal file
After Width: | Height: | Size: 699 B |
BIN
recipes/icons/brewiarz.png
Normal file
After Width: | Height: | Size: 982 B |
Before Width: | Height: | Size: 290 B After Width: | Height: | Size: 786 B |
BIN
recipes/icons/ekundelek_pl.png
Normal file
After Width: | Height: | Size: 536 B |
BIN
recipes/icons/f1_ultra.png
Normal file
After Width: | Height: | Size: 490 B |
BIN
recipes/icons/gazeta_pl_krakow.png
Normal file
After Width: | Height: | Size: 802 B |
BIN
recipes/icons/gazeta_pl_szczecin.png
Normal file
After Width: | Height: | Size: 802 B |
BIN
recipes/icons/gazeta_pl_warszawa.png
Normal file
After Width: | Height: | Size: 802 B |
Before Width: | Height: | Size: 221 B After Width: | Height: | Size: 802 B |
BIN
recipes/icons/gosc_niedzielny.png
Normal file
After Width: | Height: | Size: 588 B |
BIN
recipes/icons/kp.png
Normal file
After Width: | Height: | Size: 485 B |
BIN
recipes/icons/mateusz_czytania.png
Normal file
After Width: | Height: | Size: 1.1 KiB |
BIN
recipes/icons/myapple_pl.png
Normal file
After Width: | Height: | Size: 1.1 KiB |
BIN
recipes/icons/naszdziennik.png
Normal file
After Width: | Height: | Size: 698 B |
BIN
recipes/icons/prawica_net.png
Normal file
After Width: | Height: | Size: 609 B |
BIN
recipes/icons/rushisaband.png
Normal file
After Width: | Height: | Size: 965 B |
BIN
recipes/icons/rynek_infrastruktury.png
Normal file
After Width: | Height: | Size: 820 B |
BIN
recipes/icons/rynek_kolejowy.png
Normal file
After Width: | Height: | Size: 330 B |
BIN
recipes/icons/samcik_blox.png
Normal file
After Width: | Height: | Size: 1.1 KiB |
BIN
recipes/icons/satkurier.png
Normal file
After Width: | Height: | Size: 1.2 KiB |
BIN
recipes/icons/telepolis_pl.png
Normal file
After Width: | Height: | Size: 1.2 KiB |
BIN
recipes/icons/wprost.png
Normal file
After Width: | Height: | Size: 1.7 KiB |
36
recipes/ihned.cz.recipe
Normal file
@ -0,0 +1,36 @@
|
||||
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
|
||||
from __future__ import unicode_literals
|
||||
from calibre.web.feeds.recipes import BasicNewsRecipe
|
||||
|
||||
class ihnedRecipe(BasicNewsRecipe):
|
||||
__author__ = 'bubak'
|
||||
title = u'iHNed.cz'
|
||||
publisher = u''
|
||||
description = 'ihned.cz'
|
||||
oldest_article = 1
|
||||
max_articles_per_feed = 20
|
||||
use_embedded_content = False
|
||||
|
||||
feeds = [
|
||||
(u'Zprávy', u'http://zpravy.ihned.cz/?m=rss'),
|
||||
(u'Hospodářské noviny', u'http://hn.ihned.cz/?p=500000_rss'),
|
||||
(u'Byznys', u'http://byznys.ihned.cz/?m=rss'),
|
||||
(u'Life', u'http://life.ihned.cz/?m=rss'),
|
||||
(u'Dialog', u'http://dialog.ihned.cz/?m=rss')
|
||||
]
|
||||
|
||||
|
||||
#encoding = 'iso-8859-2'
|
||||
language = 'cs'
|
||||
cover_url = 'http://rss.ihned.cz/img/0/0_hp09/ihned.cz.gif'
|
||||
remove_javascript = True
|
||||
no_stylesheets = True
|
||||
extra_css = """
|
||||
"""
|
||||
|
||||
remove_attributes = []
|
||||
remove_tags_before = dict(name='div', attrs={'id':['heading']})
|
||||
remove_tags_after = dict(name='div', attrs={'id':['next-authors']})
|
||||
remove_tags = [dict(name='ul', attrs={'id':['comm']}),
|
||||
dict(name='div', attrs={'id':['r-big']}),
|
||||
dict(name='div', attrs={'class':['tools tools-top']})]
|
59
recipes/insider.recipe
Normal file
@ -0,0 +1,59 @@
|
||||
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
|
||||
from __future__ import unicode_literals
|
||||
import re
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class insider(BasicNewsRecipe):
|
||||
__author__ = 'bubak'
|
||||
title = 'Insider'
|
||||
language = 'cs'
|
||||
|
||||
remove_tags = [dict(name='div', attrs={'class':'article-related-content'})
|
||||
,dict(name='div', attrs={'class':'calendar'})
|
||||
,dict(name='span', attrs={'id':'labelHolder'})
|
||||
]
|
||||
|
||||
no_stylesheets = True
|
||||
keep_only_tags = [dict(name='div', attrs={'class':['doubleBlock textContentFormat']})]
|
||||
|
||||
preprocess_regexps = [(re.compile(r'T.mata:.*', re.DOTALL|re.IGNORECASE), lambda m: '</body>')]
|
||||
needs_subscription = True
|
||||
|
||||
def get_browser(self):
|
||||
br = BasicNewsRecipe.get_browser()
|
||||
br.open('http://www.denikinsider.cz/')
|
||||
br.select_form(nr=0)
|
||||
br['login-name'] = self.username
|
||||
br['login-password'] = self.password
|
||||
res = br.submit()
|
||||
raw = res.read()
|
||||
if u'Odhlásit se' not in raw:
|
||||
raise ValueError('Failed to login to insider.cz'
|
||||
'Check your username and password.')
|
||||
return br
|
||||
|
||||
def parse_index(self):
|
||||
articles = []
|
||||
|
||||
soup = self.index_to_soup('http://www.denikinsider.cz')
|
||||
titles = soup.findAll('span', attrs={'class':'homepageArticleTitle'})
|
||||
if titles is None:
|
||||
raise ValueError('Could not find category content')
|
||||
|
||||
articles = []
|
||||
seen_titles = set([])
|
||||
for title in titles:
|
||||
if title.string in seen_titles:
|
||||
continue
|
||||
article = title.parent
|
||||
seen_titles.add(title.string)
|
||||
url = article['href']
|
||||
if url.startswith('/'):
|
||||
url = 'http://www.denikinsider.cz/'+url
|
||||
self.log('\tFound article:', title, 'at', url)
|
||||
articles.append({'title':title.string, 'url':url, 'description':'',
|
||||
'date':''})
|
||||
return [(self.title, articles)]
|
||||
|
||||
|
34
recipes/kerrang.recipe
Normal file
@ -0,0 +1,34 @@
|
||||
#!/usr/bin/env python
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class kerrang(BasicNewsRecipe):
|
||||
title = u'Kerrang!'
|
||||
__author__ = 'Artur Stachecki <artur.stachecki@gmail.com>'
|
||||
language = 'en_GB'
|
||||
description = u'UK-based magazine devoted to rock music published by Bauer Media Group'
|
||||
oldest_article = 7
|
||||
masthead_url = 'http://images.kerrang.com/design/kerrang/kerrangsite/logo.gif'
|
||||
max_articles_per_feed = 100
|
||||
simultaneous_downloads = 5
|
||||
remove_javascript = True
|
||||
no_stylesheets = True
|
||||
use_embedded_content = False
|
||||
recursions = 0
|
||||
|
||||
keep_only_tags = []
|
||||
keep_only_tags.append(dict(attrs = {'class' : ['headz', 'blktxt']}))
|
||||
|
||||
extra_css = ''' img { display: block; margin-right: auto;}
|
||||
h1 {text-align: left; font-size: 22px;}'''
|
||||
|
||||
feeds = [(u'News', u'http://www.kerrang.com/blog/rss.xml')]
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
for alink in soup.findAll('a'):
|
||||
if alink.string is not None:
|
||||
tstr = alink.string
|
||||
alink.replaceWith(tstr)
|
||||
return soup
|
52
recipes/kp.recipe
Normal file
@ -0,0 +1,52 @@
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class KrytykaPolitycznaRecipe(BasicNewsRecipe):
|
||||
__license__ = 'GPL v3'
|
||||
__author__ = u'intromatyk <intromatyk@gmail.com>'
|
||||
language = 'pl'
|
||||
version = 1
|
||||
|
||||
title = u'Krytyka Polityczna'
|
||||
category = u'News'
|
||||
description = u' Lewicowe pismo zaangażowane w bieg spraw publicznych w Polsce.'
|
||||
cover_url=''
|
||||
remove_empty_feeds= True
|
||||
no_stylesheets=True
|
||||
oldest_article = 7
|
||||
max_articles_per_feed = 100000
|
||||
recursions = 0
|
||||
|
||||
no_stylesheets = True
|
||||
remove_javascript = True
|
||||
simultaneous_downloads = 3
|
||||
|
||||
keep_only_tags =[]
|
||||
keep_only_tags.append(dict(name = 'h1', attrs = {'class' : 'print-title'}))
|
||||
keep_only_tags.append(dict(name = 'div', attrs = {'class' : 'print-content'}))
|
||||
|
||||
remove_tags =[]
|
||||
remove_tags.append(dict(attrs = {'class' : ['field field-type-text field-field-story-switch', 'field field-type-filefield field-field-story-temp' , 'field field-type-text field-field-story-author', 'field field-type-text field-field-story-lead-switch']}))
|
||||
|
||||
extra_css = '''
|
||||
body {font-family: verdana, arial, helvetica, geneva, sans-serif ;}
|
||||
td.contentheading{font-size: large; font-weight: bold;}
|
||||
'''
|
||||
|
||||
feeds = [
|
||||
('Wszystkie', 'http://www.krytykapolityczna.pl/rss.xml')
|
||||
]
|
||||
|
||||
def print_version(self, url):
|
||||
soup = self.index_to_soup(url)
|
||||
print_ico = soup.find(attrs = {'class' : 'print-page'})
|
||||
print_uri = print_ico['href']
|
||||
self.log('PRINT', print_uri)
|
||||
return 'http://www.krytykapolityczna.pl/' + print_uri
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
for alink in soup.findAll('a'):
|
||||
if alink.string is not None:
|
||||
tstr = alink.string
|
||||
alink.replaceWith(tstr)
|
||||
return soup
|
32
recipes/kudy_z_nudy.recipe
Normal file
@ -0,0 +1,32 @@
|
||||
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
|
||||
from __future__ import unicode_literals
|
||||
from calibre.web.feeds.recipes import BasicNewsRecipe
|
||||
|
||||
class kudyznudyRecipe(BasicNewsRecipe):
|
||||
__author__ = 'bubak'
|
||||
title = u'Kudy z nudy'
|
||||
publisher = u''
|
||||
description = 'kudyznudy.cz'
|
||||
oldest_article = 3
|
||||
max_articles_per_feed = 20
|
||||
use_embedded_content = False
|
||||
|
||||
feeds = [
|
||||
(u'Praha nejnovější', u'http://www.kudyznudy.cz/RSS/Charts.aspx?Type=Newest&Lang=cs-CZ&RegionId=1')
|
||||
]
|
||||
|
||||
|
||||
#encoding = 'iso-8859-2'
|
||||
language = 'cs'
|
||||
cover_url = 'http://www.kudyznudy.cz/App_Themes/KzN/Images/Containers/Header/HeaderLogoKZN.png'
|
||||
remove_javascript = True
|
||||
no_stylesheets = True
|
||||
extra_css = """
|
||||
"""
|
||||
|
||||
remove_attributes = []
|
||||
remove_tags_before = dict(name='div', attrs={'class':['C_WholeContentPadding']})
|
||||
remove_tags_after = dict(name='div', attrs={'class':['SurroundingsContainer']})
|
||||
remove_tags = [dict(name='div', attrs={'class':['Details', 'buttons', 'SurroundingsContainer', 'breadcrumb']})]
|
||||
|
||||
keep_only_tags = []
|
45
recipes/lequipe.recipe
Normal file
@ -0,0 +1,45 @@
|
||||
#!/usr/bin/env python
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
|
||||
class leequipe(BasicNewsRecipe):
|
||||
title = u'l\'equipe'
|
||||
__author__ = 'Artur Stachecki <artur.stachecki@gmail.com>'
|
||||
language = 'fr'
|
||||
description = u'Retrouvez tout le sport en direct sur le site de L\'EQUIPE et suivez l\'actualité du football, rugby, basket, cyclisme, f1, volley, hand, tous les résultats sportifs'
|
||||
oldest_article = 1
|
||||
masthead_url = 'http://static.lequipe.fr/v6/img/logo-lequipe.png'
|
||||
max_articles_per_feed = 100
|
||||
simultaneous_downloads = 5
|
||||
remove_javascript = True
|
||||
no_stylesheets = True
|
||||
use_embedded_content = False
|
||||
recursions = 0
|
||||
|
||||
keep_only_tags = []
|
||||
keep_only_tags.append(dict(attrs={'id': ['article']}))
|
||||
|
||||
remove_tags = []
|
||||
remove_tags.append(dict(attrs={'id': ['partage', 'ensavoirplus', 'bloc_bas_breve', 'commentaires', 'tools']}))
|
||||
remove_tags.append(dict(attrs={'class': ['partage_bis', 'date']}))
|
||||
|
||||
feeds = [(u'Football', u'http://www.lequipe.fr/rss/actu_rss_Football.xml'),
|
||||
(u'Auto-Moto', u'http://www.lequipe.fr/rss/actu_rss_Auto-Moto.xml'),
|
||||
(u'Tennis', u'http://www.lequipe.fr/rss/actu_rss_Tennis.xml'),
|
||||
(u'Golf', u'http://www.lequipe.fr/rss/actu_rss_Golf.xml'),
|
||||
(u'Rugby', u'http://www.lequipe.fr/rss/actu_rss_Rugby.xml'),
|
||||
(u'Basket', u'http://www.lequipe.fr/rss/actu_rss_Basket.xml'),
|
||||
(u'Hand', u'http://www.lequipe.fr/rss/actu_rss_Hand.xml'),
|
||||
(u'Cyclisme', u'http://www.lequipe.fr/rss/actu_rss_Cyclisme.xml'),
|
||||
(u'Autres Sports', u'http://pipes.yahoo.com/pipes/pipe.run?_id=2039f7f4f350c70c5e4e8633aa1b37cd&_render=rss')
|
||||
]
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
for alink in soup.findAll('a'):
|
||||
if alink.string is not None:
|
||||
tstr = alink.string
|
||||
alink.replaceWith(tstr)
|
||||
return soup
|
40
recipes/lidovky.recipe
Normal file
@ -0,0 +1,40 @@
|
||||
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
|
||||
from __future__ import unicode_literals
|
||||
from calibre.web.feeds.recipes import BasicNewsRecipe
|
||||
import re
|
||||
|
||||
class lnRecipe(BasicNewsRecipe):
|
||||
__author__ = 'bubak'
|
||||
title = u'lidovky'
|
||||
publisher = u''
|
||||
description = 'lidovky.cz'
|
||||
oldest_article = 1
|
||||
max_articles_per_feed = 20
|
||||
|
||||
feeds = [
|
||||
(u'Události', u'http://www.lidovky.cz/export/rss.asp?r=ln_domov'),
|
||||
(u'Svět', u'http://www.lidovky.cz/export/rss.asp?r=ln_zahranici'),
|
||||
(u'Byznys', u'http://www.lidovky.cz/export/rss.asp?c=ln_byznys'),
|
||||
(u'Věda', u'http://www.lidovky.cz/export/rss.asp?r=ln_veda'),
|
||||
(u'Názory', u'http://www.lidovky.cz/export/rss.asp?r=ln_nazory'),
|
||||
(u'Relax', u'http://www.lidovky.cz/export/rss.asp?c=ln_relax')
|
||||
]
|
||||
|
||||
|
||||
#encoding = 'iso-8859-2'
|
||||
language = 'cs'
|
||||
cover_url = 'http://g.lidovky.cz/o/lidovky_ln3b/lidovky-logo.png'
|
||||
remove_javascript = True
|
||||
no_stylesheets = True
|
||||
use_embedded_content = False
|
||||
remove_attributes = []
|
||||
remove_tags_before = dict(name='div', attrs={'id':['content']})
|
||||
remove_tags_after = dict(name='div', attrs={'class':['authors']})
|
||||
preprocess_regexps = [(re.compile(r'<div id="(fb-root)".*', re.DOTALL|re.IGNORECASE), lambda match: '</body>')]
|
||||
|
||||
keep_only_tags = []
|
||||
|
||||
|
||||
|
||||
|
||||
|
36
recipes/mateusz_czytania.recipe
Normal file
@ -0,0 +1,36 @@
|
||||
#!/usr/bin/env python
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__author__ = 'teepel <teepel44@gmail.com>'
|
||||
|
||||
'''
|
||||
http://www.mateusz.pl/czytania
|
||||
'''
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class czytania_mateusz(BasicNewsRecipe):
|
||||
title = u'Czytania na ka\u017cdy dzie\u0144'
|
||||
__author__ = 'teepel <teepel44@gmail.com>'
|
||||
description = u'Codzienne czytania z jednego z najstarszych polskich serwisów katolickich.'
|
||||
language = 'pl'
|
||||
INDEX='http://www.mateusz.pl/czytania'
|
||||
oldest_article = 1
|
||||
remove_empty_feeds= True
|
||||
no_stylesheets=True
|
||||
auto_cleanup = True
|
||||
remove_javascript = True
|
||||
simultaneous_downloads = 2
|
||||
max_articles_per_feed = 100
|
||||
auto_cleanup = True
|
||||
|
||||
feeds = [(u'Czytania', u'http://mateusz.pl/rss/czytania/')]
|
||||
|
||||
remove_tags =[]
|
||||
remove_tags.append(dict(name = 'p', attrs = {'class' : 'top'}))
|
||||
|
||||
#thanks t3d
|
||||
def get_article_url(self, article):
|
||||
link = article.get('link')
|
||||
if 'kmt.pl' not in link:
|
||||
return link
|
29
recipes/metropol_tv.recipe
Normal file
@ -0,0 +1,29 @@
|
||||
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
|
||||
from __future__ import unicode_literals
|
||||
from calibre.web.feeds.recipes import BasicNewsRecipe
|
||||
|
||||
class metropolRecipe(BasicNewsRecipe):
|
||||
__author__ = 'bubak'
|
||||
title = u'Metropol TV'
|
||||
publisher = u''
|
||||
description = 'metropol.cz'
|
||||
oldest_article = 1
|
||||
max_articles_per_feed = 20
|
||||
use_embedded_content = False
|
||||
|
||||
feeds = [
|
||||
(u'Metropolcv.cz', u'http://www.metropol.cz/rss/')
|
||||
]
|
||||
|
||||
|
||||
#encoding = 'iso-8859-2'
|
||||
language = 'cs'
|
||||
cover_url = 'http://www.metropol.cz/public/css/../images/logo/metropoltv.png'
|
||||
remove_javascript = True
|
||||
no_stylesheets = True
|
||||
extra_css = """
|
||||
"""
|
||||
|
||||
remove_attributes = []
|
||||
|
||||
keep_only_tags = [dict(name='div', attrs={'id':['art-full']})]
|
49
recipes/myapple_pl.recipe
Normal file
@ -0,0 +1,49 @@
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class MyAppleRecipe(BasicNewsRecipe):
|
||||
__license__ = 'GPL v3'
|
||||
__author__ = u'Artur Stachecki <artur.stachecki@gmail.com>'
|
||||
language = 'pl'
|
||||
version = 1
|
||||
|
||||
title = u'MyApple.pl'
|
||||
category = u'News'
|
||||
description = u' Największy w Polsce serwis zajmujący się tematyką związaną z Apple i wszelkimi produktami tej firmy.'
|
||||
cover_url=''
|
||||
remove_empty_feeds= True
|
||||
no_stylesheets=True
|
||||
oldest_article = 7
|
||||
max_articles_per_feed = 100000
|
||||
recursions = 0
|
||||
|
||||
no_stylesheets = True
|
||||
remove_javascript = True
|
||||
simultaneous_downloads = 3
|
||||
|
||||
keep_only_tags =[]
|
||||
keep_only_tags.append(dict(name = 'div', attrs = {'id' : 'article_content'}))
|
||||
|
||||
remove_tags =[]
|
||||
remove_tags.append(dict(name = 'div', attrs = {'class' : 'article_author_date_comment_container'}))
|
||||
remove_tags.append(dict(name = 'div', attrs = {'class' : 'fullwidth'}))
|
||||
remove_tags.append(dict(name = 'div', attrs = {'class' : 'cmslinks'}))
|
||||
remove_tags.append(dict(name = 'div', attrs = {'class' : 'googleads-468'}))
|
||||
remove_tags.append(dict(name = 'div', attrs = {'id' : 'comments'}))
|
||||
|
||||
|
||||
extra_css = '''
|
||||
body {font-family: verdana, arial, helvetica, geneva, sans-serif ;}
|
||||
td.contentheading{font-size: large; font-weight: bold;}
|
||||
'''
|
||||
|
||||
feeds = [
|
||||
('News', 'feed://myapple.pl/external.php?do=rss&type=newcontent§ionid=1&days=120&count=10'),
|
||||
]
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
for alink in soup.findAll('a'):
|
||||
if alink.string is not None:
|
||||
tstr = alink.string
|
||||
alink.replaceWith(tstr)
|
||||
return soup
|
30
recipes/nadacni_fond_proti_korupci.recipe
Normal file
@ -0,0 +1,30 @@
|
||||
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
|
||||
from __future__ import unicode_literals
|
||||
from calibre.web.feeds.recipes import BasicNewsRecipe
|
||||
|
||||
class nfpkRecipe(BasicNewsRecipe):
|
||||
__author__ = 'bubak'
|
||||
title = u'Nadační fond proti korupci'
|
||||
publisher = u''
|
||||
description = 'nfpk.cz'
|
||||
oldest_article = 7
|
||||
max_articles_per_feed = 20
|
||||
use_embedded_content = False
|
||||
remove_empty_feeds = True
|
||||
|
||||
feeds = [
|
||||
(u'Aktuality', u'http://feeds.feedburner.com/nfpk')
|
||||
]
|
||||
|
||||
|
||||
#encoding = 'iso-8859-2'
|
||||
language = 'cs'
|
||||
cover_url = 'http://www.nfpk.cz/_templates/nfpk/_images/logo.gif'
|
||||
remove_javascript = True
|
||||
no_stylesheets = True
|
||||
extra_css = """
|
||||
"""
|
||||
|
||||
remove_attributes = []
|
||||
keep_only_tags = [dict(name='div', attrs={'id':'content'})]
|
||||
|
61
recipes/naszdziennik.recipe
Normal file
@ -0,0 +1,61 @@
|
||||
#!/usr/bin/env python
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class naszdziennik(BasicNewsRecipe):
|
||||
title = u'Nasz Dziennik'
|
||||
__author__ = 'Artur Stachecki <artur.stachecki@gmail.com>'
|
||||
language = 'pl'
|
||||
description =u'Nasz Dziennik - Ogólnopolska gazeta codzienna. Podejmuje tematykę dotyczącą życia społecznego, kulturalnego, politycznego i religijnego. Propaguje wartości chrześcijańskie oraz tradycję i kulturę polską.'
|
||||
masthead_url='http://www.naszdziennik.pl/images/logo-male.png'
|
||||
max_articles_per_feed = 100
|
||||
remove_javascript=True
|
||||
no_stylesheets = True
|
||||
|
||||
keep_only_tags =[dict(attrs = {'id' : 'article'})]
|
||||
|
||||
#definiujemy nową funkcje; musi zwracać listę feedów wraz z artykułami
|
||||
def parse_index(self):
|
||||
#adres do parsowania artykułów
|
||||
soup = self.index_to_soup('http://www.naszdziennik.pl/news')
|
||||
#deklaracja pustej listy feedów
|
||||
feeds = []
|
||||
#deklaracja pustego słownika artykułów
|
||||
articles = {}
|
||||
#deklaracja pustej listy sekcji
|
||||
sections = []
|
||||
#deklaracja pierwszej sekcji jako pusty string
|
||||
section = ''
|
||||
|
||||
#pętla for, która analizuje po kolei każdy tag "news-article"
|
||||
for item in soup.findAll(attrs = {'class' : 'news-article'}) :
|
||||
#w tagu "news-article szukamy pierwszego taga h4"
|
||||
section = item.find('h4')
|
||||
#zmiennej sekcja przypisujemy zawartość tekstową taga
|
||||
section = self.tag_to_string(section)
|
||||
#sprawdzamy czy w słowniku artykułów istnieje klucz dotyczący sekcji
|
||||
#jeśli nie istnieje to :
|
||||
if not articles.has_key(section) :
|
||||
#do listy sekcji dodajemy nową sekcje
|
||||
sections.append(section)
|
||||
#deklarujemy nową sekcje w słowniku artykułów przypisując jej klucz odpowiadający nowej sekcji, którego wartością jest pusta lista
|
||||
articles[section] = []
|
||||
#przeszukujemy kolejny tag "title-datetime"
|
||||
article_title_datetime = item.find(attrs = {'class' : 'title-datetime'})
|
||||
#w tagu title-datetime znajdujemy pierwszy link
|
||||
article_a = article_title_datetime.find('a')
|
||||
#i tworzymy z niego link absolutny do właściwego artykułu
|
||||
article_url = 'http://naszdziennik.pl' + article_a['href']
|
||||
#jako tytuł użyty będzie tekst pomiędzy tagami <a>
|
||||
article_title = self.tag_to_string(article_a)
|
||||
#a data będzie tekstem z pierwszego taga h4 znalezionego w tagu title-datetime
|
||||
article_date = self.tag_to_string(article_title_datetime.find('h4'))
|
||||
#zebrane elementy dodajemy do listy zadeklarowanej w linijce 44
|
||||
articles[section].append( { 'title' : article_title, 'url' : article_url, 'date' : article_date })
|
||||
#po dodaniu wszystkich artykułów dodajemy sekcje do listy feedów, korzystając z list sekcji znajdujących się w słowniku
|
||||
for section in sections:
|
||||
feeds.append((section, articles[section]))
|
||||
#zwracamy listę feedów, której parsowaniem zajmie się calibre
|
||||
return feeds
|
56
recipes/nepszabadsag.recipe
Normal file
@ -0,0 +1,56 @@
|
||||
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
|
||||
from __future__ import unicode_literals
|
||||
'''
|
||||
Fetch Népszabadság
|
||||
'''
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class nepszabadsag(BasicNewsRecipe):
|
||||
title = u'N\u00e9pszabads\u00e1g'
|
||||
description = ''
|
||||
__author__ = 'bubak'
|
||||
use_embedded_content = False
|
||||
timefmt = ' [%d %b %Y]'
|
||||
oldest_article = 2
|
||||
max_articles_per_feed = 20
|
||||
no_stylesheets = True
|
||||
language = 'hu'
|
||||
#delay = 1
|
||||
#timeout = 10
|
||||
simultaneous_downloads = 5
|
||||
|
||||
#encoding = 'utf-8'
|
||||
remove_javascript = True
|
||||
cover_url = 'http://nol.hu/_design/image/logo_nol_live.jpg'
|
||||
|
||||
feeds = [
|
||||
(u'Belföld', u'http://nol.hu/feed/belfold.rss')
|
||||
#,(u'Külföld', u'http://nol.hu/feed/kulfold.rss')
|
||||
#,(u'Gazdaság', u'http://nol.hu/feed/gazdasag.rss')
|
||||
#,(u'Kultúra', u'http://nol.hu/feed/kult.rss')
|
||||
]
|
||||
|
||||
extra_css = '''
|
||||
'''
|
||||
|
||||
remove_attributes = []
|
||||
remove_tags_before = dict(name='div', attrs={'class':['d-source']})
|
||||
remove_tags_after = dict(name='div', attrs={'class':['tags']})
|
||||
remove_tags = [dict(name='div', attrs={'class':['h']}),
|
||||
dict(name='tfoot')]
|
||||
|
||||
|
||||
keep_only_tags = [dict(name='table', attrs={'class':'article-box'})]
|
||||
|
||||
# NS sends an ad page sometimes but not frequently enough, TBD
|
||||
def AAskip_ad_pages(self, soup):
|
||||
if ('advertisement' in soup.find('title').string.lower()):
|
||||
href = soup.find('a').get('href')
|
||||
self.log.debug('Skipping to: ' + href)
|
||||
new = self.browser.open(href).read().decode('utf-8', 'ignore')
|
||||
#ipython(locals())
|
||||
self.log.debug('Finished: ' + href)
|
||||
return new
|
||||
else:
|
||||
return None
|
||||
|
32
recipes/neviditelny_pes.recipe
Normal file
@ -0,0 +1,32 @@
|
||||
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
|
||||
from __future__ import unicode_literals
|
||||
from calibre.web.feeds.recipes import BasicNewsRecipe
|
||||
|
||||
class pesRecipe(BasicNewsRecipe):
|
||||
__author__ = 'bubak'
|
||||
title = u'Neviditelný pes'
|
||||
publisher = u''
|
||||
description = u'Neviditelný pes'
|
||||
oldest_article = 1
|
||||
max_articles_per_feed = 20
|
||||
use_embedded_content = False
|
||||
remove_empty_feeds = True
|
||||
|
||||
feeds = [
|
||||
(u'Neviditelný pes', u'http://neviditelnypes.lidovky.cz/export/rss.asp?c=pes_neviditelny')
|
||||
]
|
||||
|
||||
|
||||
#encoding = 'iso-8859-2'
|
||||
language = 'cs'
|
||||
cover_url = 'http://g.zpravy.cz/o/pes/logo_pes.jpg'
|
||||
remove_javascript = True
|
||||
no_stylesheets = True
|
||||
extra_css = """
|
||||
"""
|
||||
|
||||
remove_tags = []
|
||||
remove_tags_before = dict(name='div', attrs={'id':'art-full'})
|
||||
remove_tags_after = dict(name='div', attrs={'id':'authors'})
|
||||
|
||||
|
@ -22,9 +22,9 @@ class NewYorker(BasicNewsRecipe):
|
||||
masthead_url = 'http://www.newyorker.com/css/i/hed/logo.gif'
|
||||
extra_css = """
|
||||
body {font-family: "Times New Roman",Times,serif}
|
||||
.articleauthor{color: #9F9F9F;
|
||||
.articleauthor{color: #9F9F9F;
|
||||
font-family: Arial, sans-serif;
|
||||
font-size: small;
|
||||
font-size: small;
|
||||
text-transform: uppercase}
|
||||
.rubric,.dd,h6#credit{color: #CD0021;
|
||||
font-family: Arial, sans-serif;
|
||||
@ -63,11 +63,11 @@ class NewYorker(BasicNewsRecipe):
|
||||
return url.strip()
|
||||
|
||||
def get_cover_url(self):
|
||||
cover_url = None
|
||||
soup = self.index_to_soup('http://www.newyorker.com/magazine/toc/')
|
||||
cover_item = soup.find('img',attrs={'id':'inThisIssuePhoto'})
|
||||
cover_url = "http://www.newyorker.com/images/covers/1925/1925_02_21_p233.jpg"
|
||||
soup = self.index_to_soup('http://www.newyorker.com/magazine?intcid=magazine')
|
||||
cover_item = soup.find('div',attrs={'id':'media-count-1'})
|
||||
if cover_item:
|
||||
cover_url = 'http://www.newyorker.com' + cover_item['src'].strip()
|
||||
cover_url = 'http://www.newyorker.com' + cover_item.div.img['src'].strip()
|
||||
return cover_url
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
|
@ -13,7 +13,7 @@ import datetime
|
||||
class Newsweek(BasicNewsRecipe):
|
||||
|
||||
# how many issues to go back, 0 means get the most current one
|
||||
BACK_ISSUES = 1
|
||||
BACK_ISSUES = 2
|
||||
|
||||
EDITION = '0'
|
||||
DATE = None
|
||||
|
50
recipes/novinky.cz.recipe
Normal file
@ -0,0 +1,50 @@
|
||||
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
|
||||
from __future__ import unicode_literals
|
||||
from calibre.web.feeds.recipes import BasicNewsRecipe
|
||||
|
||||
class novinkyRecipe(BasicNewsRecipe):
|
||||
__author__ = 'bubak'
|
||||
title = u'novinky.cz'
|
||||
publisher = u'seznam.cz'
|
||||
description = 'novinky.cz'
|
||||
oldest_article = 1
|
||||
max_articles_per_feed = 20
|
||||
|
||||
feeds = [
|
||||
(u'Domácí', u'http://www.novinky.cz/rss2/domaci/'),
|
||||
(u'Praha', u'http://www.novinky.cz/rss2/vase-zpravy/praha/'),
|
||||
(u'Ekonomika', u'http://www.novinky.cz/rss2/ekonomika/'),
|
||||
(u'Finance', u'http://www.novinky.cz/rss2/finance/'),
|
||||
]
|
||||
|
||||
|
||||
#encoding = 'utf-8'
|
||||
language = 'cs'
|
||||
cover_url = 'http://www.novinky.cz/static/images/logo.gif'
|
||||
remove_javascript = True
|
||||
no_stylesheets = True
|
||||
|
||||
remove_tags = [dict(name='div', attrs={'id':['pictureInnerBox']}),
|
||||
dict(name='div', attrs={'id':['discussionEntry']}),
|
||||
dict(name='span', attrs={'id':['mynews-hits', 'mynews-author']}),
|
||||
dict(name='div', attrs={'class':['related']}),
|
||||
dict(name='div', attrs={'id':['multimediaInfo']})]
|
||||
remove_tags_before = dict(name='div',attrs={'class':['articleHeader']})
|
||||
remove_tags_after = dict(name='div',attrs={'class':'related'})
|
||||
|
||||
keep_only_tags = []
|
||||
|
||||
# This source has identical articles under different links
|
||||
# which are redirected to the common url. I've found
|
||||
# just this API method that has the real URL
|
||||
visited_urls = {}
|
||||
def encoding(self, source):
|
||||
url = source.newurl
|
||||
if url in self.visited_urls:
|
||||
self.log.debug('Ignoring duplicate: ' + url)
|
||||
return None
|
||||
else:
|
||||
self.visited_urls[url] = True
|
||||
self.log.debug('Accepting: ' + url)
|
||||
return source.decode('utf-8', 'replace')
|
||||
|
38
recipes/parlamentni_listy.recipe
Normal file
@ -0,0 +1,38 @@
|
||||
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
|
||||
from __future__ import unicode_literals
|
||||
from calibre.web.feeds.recipes import BasicNewsRecipe
|
||||
import re
|
||||
|
||||
class plRecipe(BasicNewsRecipe):
|
||||
__author__ = 'bubak'
|
||||
title = u'Parlamentn\u00ed Listy'
|
||||
publisher = u''
|
||||
description = ''
|
||||
oldest_article = 1
|
||||
max_articles_per_feed = 20
|
||||
|
||||
feeds = [
|
||||
(u'Parlamentní listy.cz', u'http://www.parlamentnilisty.cz/export/rss.aspx')
|
||||
]
|
||||
|
||||
|
||||
#encoding = 'iso-8859-2'
|
||||
language = 'cs'
|
||||
cover_url = 'http://www.parlamentnilisty.cz/design/listy-logo2.png'
|
||||
remove_javascript = True
|
||||
no_stylesheets = True
|
||||
use_embedded_content = False
|
||||
remove_attributes = []
|
||||
remove_tags = [dict(name='div', attrs={'class':['articledetailboxin','crumbs', 'relatedarticles articledetailbox']}),
|
||||
dict(name='div', attrs={'class':['socialshare-1 noprint', 'socialshare-2 noprint']}),
|
||||
dict(name='div', attrs={'id':'widget'}),
|
||||
dict(name='div', attrs={'class':'article-discussion-box noprint'})]
|
||||
preprocess_regexps = [(re.compile(r'<(span|strong)[^>]*>\s*Ptejte se politik.*', re.DOTALL|re.IGNORECASE), lambda match: '</body>')]
|
||||
|
||||
keep_only_tags = [dict(name='div', attrs={'class':['article-detail']})]
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
40
recipes/piratska_strana.recipe
Normal file
@ -0,0 +1,40 @@
|
||||
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
|
||||
from __future__ import unicode_literals
|
||||
from calibre.web.feeds.recipes import BasicNewsRecipe
|
||||
|
||||
class cpsRecipe(BasicNewsRecipe):
|
||||
__author__ = 'bubak'
|
||||
title = u'Piratská strana'
|
||||
publisher = u''
|
||||
description = ''
|
||||
oldest_article = 3
|
||||
max_articles_per_feed = 20
|
||||
use_embedded_content = False
|
||||
remove_empty_feeds = True
|
||||
|
||||
feeds = [
|
||||
(u'Články', u'http://www.pirati.cz/rss.xml')
|
||||
]
|
||||
|
||||
|
||||
#encoding = 'iso-8859-2'
|
||||
language = 'cs'
|
||||
cover_url = 'http://www.pirati.cz/sites/all/themes/addari-cps/images/headbg.jpg'
|
||||
remove_javascript = True
|
||||
no_stylesheets = True
|
||||
extra_css = """
|
||||
"""
|
||||
|
||||
remove_attributes = []
|
||||
keep_only_tags = [dict(name='div', attrs={'id':'postarea'})]
|
||||
remove_tags = [dict(name='div', attrs={'class':['breadcrumb', 'submitted', 'links-readmore']}),
|
||||
dict(name='div', attrs={'id':['comments']})]
|
||||
remove_tags_before = dict(name='font', attrs={'size':'+3'})
|
||||
remove_tags_after = [dict(name='iframe')]
|
||||
|
||||
conversion_options = {'linearize_tables' : True}
|
||||
|
||||
|
||||
|
||||
|
||||
|
34
recipes/piratske_noviny.recipe
Normal file
@ -0,0 +1,34 @@
|
||||
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
|
||||
from __future__ import unicode_literals
|
||||
from calibre.web.feeds.recipes import BasicNewsRecipe
|
||||
|
||||
class nfpkRecipe(BasicNewsRecipe):
|
||||
__author__ = 'bubak'
|
||||
title = u'Piratské noviny'
|
||||
publisher = u''
|
||||
description = 'nfpk.cz'
|
||||
oldest_article = 2
|
||||
max_articles_per_feed = 20
|
||||
use_embedded_content = False
|
||||
remove_empty_feeds = True
|
||||
|
||||
feeds = [
|
||||
(u'Aktuality', u'http://www.piratskenoviny.cz/run/rss.php')
|
||||
]
|
||||
|
||||
|
||||
#encoding = 'iso-8859-2'
|
||||
language = 'cs'
|
||||
cover_url = 'http://www.piratskenoviny.cz/imgs/piratske-noviny.gif'
|
||||
remove_javascript = True
|
||||
no_stylesheets = True
|
||||
extra_css = """
|
||||
"""
|
||||
|
||||
remove_attributes = []
|
||||
remove_tags_before = dict(name='font', attrs={'size':'+3'})
|
||||
remove_tags_after = [dict(name='iframe')]
|
||||
conversion_options = {'linearize_tables' : True}
|
||||
|
||||
|
||||
|
@ -4,7 +4,7 @@ class AdvancedUserRecipe1348063712(BasicNewsRecipe):
|
||||
title = u'Portfolio.hu - English Edition'
|
||||
__author__ = 'laca'
|
||||
oldest_article = 7
|
||||
language = 'en_HUN'
|
||||
language = 'en_HU'
|
||||
masthead_url = 'http://www.portfolio.hu/img/sit/angolfejlec2010.jpg'
|
||||
use_embedded_content = False
|
||||
auto_cleanup = True
|
||||
|
64
recipes/pravo.recipe
Normal file
@ -0,0 +1,64 @@
|
||||
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class pravo(BasicNewsRecipe):
|
||||
__author__ = 'bubak'
|
||||
title = 'Právo'
|
||||
language = 'cs'
|
||||
|
||||
remove_tags_before = dict(name='div', attrs={'class':'rubrika-ostat'})
|
||||
remove_tags_after = dict(name='td', attrs={'class':'rubrika'})
|
||||
remove_tags = [dict(name='td', attrs={'width':'273'})
|
||||
,dict(name='td', attrs={'class':'rubrika'})
|
||||
,dict(name='div', attrs={'class':'rubrika-ostat'})
|
||||
]
|
||||
extra_css = '.nadpis {font-weight: bold; font-size: 130%;} .medium {text-align: justify;}'
|
||||
cover_url = 'http://pravo.novinky.cz/images/horni_6_logo.gif'
|
||||
cover_margins = (0, 100, '#ffffff')
|
||||
conversion_options = {'linearize_tables' : True}
|
||||
|
||||
no_stylesheets = True
|
||||
|
||||
# our variables
|
||||
seen_titles = set([])
|
||||
# only yesterday's articles are online
|
||||
parent_url = 'http://pravo.novinky.cz/minule/'
|
||||
feeds = [
|
||||
('Hlavní stránka', 'http://pravo.novinky.cz/minule/index.php'),
|
||||
('Zpravodajství', 'http://pravo.novinky.cz/minule/zpravodajstvi.php'),
|
||||
('Komentáře', 'http://pravo.novinky.cz/minule/komentare.php'),
|
||||
('Praha a střední Čechy', 'http://pravo.novinky.cz/minule/praha_stredni_cechy.php')
|
||||
]
|
||||
|
||||
|
||||
def parse_index(self):
|
||||
articles = []
|
||||
|
||||
for feed in self.feeds:
|
||||
articles.append(self.parse_page(feed))
|
||||
return articles
|
||||
|
||||
def parse_page(self, (feed_title, url)):
|
||||
articles = []
|
||||
|
||||
soup = self.index_to_soup(url)
|
||||
titles = soup.findAll('a', attrs={'class':'nadpis'})
|
||||
if titles is None:
|
||||
raise ValueError('Could not find any articles on page ' + url)
|
||||
|
||||
articles = []
|
||||
for article in titles:
|
||||
title = article.string
|
||||
if title in self.seen_titles:
|
||||
continue
|
||||
self.seen_titles.add(title)
|
||||
url = article['href']
|
||||
if not url.startswith('http'):
|
||||
url = self.parent_url + url
|
||||
self.log('\tFound article:', title, 'at', url)
|
||||
articles.append({'title':title.string, 'url':url, 'description':'',
|
||||
'date':''})
|
||||
return (feed_title, articles)
|
||||
|
40
recipes/prawica_net.recipe
Normal file
@ -0,0 +1,40 @@
|
||||
#!/usr/bin/env python
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__author__ = 'teepel <teepel44@gmail.com>'
|
||||
|
||||
'''
|
||||
http://prawica.net
|
||||
'''
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class prawica_recipe(BasicNewsRecipe):
|
||||
title = u'prawica.net'
|
||||
__author__ = 'teepel <teepel44@gmail.com>'
|
||||
language = 'pl'
|
||||
description ='Wiadomości ze strony prawica.net'
|
||||
INDEX='http://prawica.net/'
|
||||
remove_empty_feeds= True
|
||||
oldest_article = 1
|
||||
max_articles_per_feed = 100
|
||||
remove_javascript=True
|
||||
no_stylesheets=True
|
||||
|
||||
feeds = [(u'all', u'http://prawica.net/all/feed')]
|
||||
|
||||
|
||||
keep_only_tags =[]
|
||||
#this line should show title of the article, but it doesnt work
|
||||
keep_only_tags.append(dict(name = 'h1', attrs = {'class' : 'print-title'}))
|
||||
keep_only_tags.append(dict(name = 'div', attrs = {'class' : 'content'}))
|
||||
|
||||
|
||||
remove_tags =[]
|
||||
remove_tags.append(dict(name = 'div', attrs = {'class' : 'field field-type-viewfield field-field-autor2'}))
|
||||
remove_tags.append(dict(name = 'div', attrs = {'class' : 'field field-type-viewfield field-field-publikacje-autora'}))
|
||||
remove_tags.append(dict(name = 'div', attrs = {'id' : 'rate-widget-2 rate-widget clear-block rate-average rate-widget-fivestar rate-daa7512627f21dcf15e0af47e5279f0e rate-processed'}))
|
||||
remove_tags_after =[(dict(name = 'div', attrs = {'class' : 'field-label-inline-first'}))]
|
||||
|
||||
def print_version(self, url):
|
||||
return url.replace('http://prawica.net/', 'http://prawica.net/print/')
|
32
recipes/red_voltaire.recipe
Normal file
@ -0,0 +1,32 @@
|
||||
import re
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class RedVoltaireRecipe(BasicNewsRecipe):
|
||||
title = u'Red Voltaire'
|
||||
__author__ = 'atordo'
|
||||
description = u'Red de prensa no alineada, especializada en el an\u00e1lisis de las relaciones internacionales'
|
||||
oldest_article = 7
|
||||
max_articles_per_feed = 30
|
||||
auto_cleanup = False
|
||||
no_stylesheets = True
|
||||
language = 'es'
|
||||
use_embedded_content = False
|
||||
remove_javascript = True
|
||||
cover_url = u'http://www.voltairenet.org/squelettes/elements/images/logo-voltairenet-org.png'
|
||||
masthead_url = u'http://www.voltairenet.org/squelettes/elements/images/logo-voltairenet-org.png'
|
||||
|
||||
preprocess_regexps = [
|
||||
(re.compile(r'<title>(?P<titulo>.+)</title>.+<span class="updated" title=".+"><time', re.IGNORECASE|re.DOTALL)
|
||||
,lambda match:'</title></head><body><h1>'+match.group('titulo')+'</h1><time')
|
||||
,(re.compile(r'<time datetime=.+pubdate>. (?P<fecha>.+)</time>.+<!------------------- COLONNE TEXTE ------------------->', re.IGNORECASE|re.DOTALL)
|
||||
,lambda match:'<small>'+match.group('fecha')+'</small>')
|
||||
,(re.compile(r'<aside>.+', re.IGNORECASE|re.DOTALL)
|
||||
,lambda match:'</body></html>')
|
||||
]
|
||||
|
||||
extra_css = '''
|
||||
img{margin-bottom:0.4em; display:block; margin-left:auto; margin-right:auto}
|
||||
'''
|
||||
|
||||
feeds = [u'http://www.voltairenet.org/spip.php?page=backend&id_secteur=1110&lang=es']
|
||||
|
37
recipes/respekt.recipe
Normal file
@ -0,0 +1,37 @@
|
||||
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
|
||||
from __future__ import unicode_literals
|
||||
from calibre.web.feeds.recipes import BasicNewsRecipe
|
||||
import re
|
||||
|
||||
class respektRecipe(BasicNewsRecipe):
|
||||
__author__ = 'bubak'
|
||||
title = u'Respekt'
|
||||
publisher = u'Respekt'
|
||||
description = 'Respekt'
|
||||
oldest_article = 1
|
||||
max_articles_per_feed = 20
|
||||
|
||||
feeds = [
|
||||
(u'Všechny články', u'http://respekt.ihned.cz/index.php?p=R00000_rss')
|
||||
,(u'Blogy', u'http://blog.respekt.ihned.cz/?p=Rb00VR_rss')
|
||||
#,(u'Respekt DJ', u'http://respekt.ihned.cz/index.php?p=R00RDJ_rss')
|
||||
]
|
||||
|
||||
|
||||
encoding = 'cp1250'
|
||||
language = 'cs'
|
||||
cover_url = 'http://respekt.ihned.cz/img/R/respekt_logo.png'
|
||||
remove_javascript = True
|
||||
no_stylesheets = True
|
||||
|
||||
remove_tags = [dict(name='div', attrs={'class':['d-tools', 'actions']})]
|
||||
remove_tags_before = dict(name='div',attrs={'id':['detail']})
|
||||
remove_tags_after = dict(name='div',attrs={'class':'d-tools'})
|
||||
preprocess_regexps = [(re.compile(r'<div class="paid-zone".*', re.DOTALL|re.IGNORECASE), lambda match: 'Za zbytek článku je nutno platit. </body>'),
|
||||
(re.compile(r'.*<div class="mm-ow">', re.DOTALL|re.IGNORECASE), lambda match: '<body>'),
|
||||
(re.compile(r'<div class="col3">.*', re.DOTALL|re.IGNORECASE), lambda match: '</body>')]
|
||||
|
||||
keep_only_tags = []
|
||||
|
||||
|
||||
|
28
recipes/rushisaband.recipe
Normal file
@ -0,0 +1,28 @@
|
||||
#!/usr/bin/env python
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__author__ = 'MrStefan <mrstefaan@gmail.com>'
|
||||
|
||||
'''
|
||||
www.rushisaband.com
|
||||
'''
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class rushisaband(BasicNewsRecipe):
|
||||
title = u'Rushisaband'
|
||||
__author__ = 'MrStefan <mrstefaan@gmail.com>'
|
||||
language = 'en_GB'
|
||||
description =u'A blog devoted to the band RUSH and its members, Neil Peart, Geddy Lee and Alex Lifeson'
|
||||
remove_empty_feeds= True
|
||||
oldest_article = 7
|
||||
max_articles_per_feed = 100
|
||||
remove_javascript=True
|
||||
no_stylesheets=True
|
||||
|
||||
keep_only_tags =[]
|
||||
keep_only_tags.append(dict(name = 'h4'))
|
||||
keep_only_tags.append(dict(name = 'h5'))
|
||||
keep_only_tags.append(dict(name = 'p'))
|
||||
|
||||
feeds = [(u'Rush is a Band', u'http://feeds2.feedburner.com/rushisaband/blog')]
|
29
recipes/rybinski.recipe
Normal file
@ -0,0 +1,29 @@
|
||||
#!/usr/bin/env python
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = u'2012, Tomasz Dlugosz <tomek3d@gmail.com>'
|
||||
'''
|
||||
rybinski.eu
|
||||
'''
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class Rybinski(BasicNewsRecipe):
|
||||
title = u'Rybinski.eu - economy of the XXI century'
|
||||
description = u'Blog ekonomiczny dra hab. Krzysztofa Rybi\u0144skiego'
|
||||
language = 'pl'
|
||||
__author__ = u'Tomasz D\u0142ugosz'
|
||||
oldest_article = 7
|
||||
max_articles_per_feed = 100
|
||||
no_stylesheets = True
|
||||
|
||||
feeds = [(u'wpisy', u'http://www.rybinski.eu/?feed=rss2&lang=pl')]
|
||||
|
||||
keep_only_tags = [dict(name='div', attrs={'class':'post'})]
|
||||
|
||||
remove_tags = [
|
||||
dict(name = 'div', attrs = {'class' : 'post-meta-1'}),
|
||||
dict(name = 'div', attrs = {'class' : 'post-meta-2'}),
|
||||
dict(name = 'div', attrs = {'class' : 'post-comments'})
|
||||
]
|
||||
|
41
recipes/rynek_infrastruktury.recipe
Normal file
@ -0,0 +1,41 @@
|
||||
#!/usr/bin/env python
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__author__ = 'teepel <teepel44@gmail.com>'
|
||||
|
||||
'''
|
||||
http://www.rynekinfrastruktury.pl
|
||||
'''
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class prawica_recipe(BasicNewsRecipe):
|
||||
title = u'Rynek Infrastruktury'
|
||||
__author__ = 'teepel <teepel44@gmail.com>'
|
||||
language = 'pl'
|
||||
description =u'Portal "Rynek Infrastruktury" to źródło informacji o kluczowych elementach polskiej gospodarki: drogach, kolei, lotniskach, portach, telekomunikacji, energetyce, prawie i polityce, wzmocnione eksperckimi komentarzami kluczowych analityków.'
|
||||
remove_empty_feeds= True
|
||||
oldest_article = 1
|
||||
max_articles_per_feed = 100
|
||||
remove_javascript=True
|
||||
no_stylesheets=True
|
||||
|
||||
feeds = [
|
||||
(u'Drogi', u'http://www.rynekinfrastruktury.pl/rss/41'),
|
||||
(u'Lotniska', u'http://www.rynekinfrastruktury.pl/rss/42'),
|
||||
(u'Kolej', u'http://www.rynekinfrastruktury.pl/rss/37'),
|
||||
(u'Energetyka', u'http://www.rynekinfrastruktury.pl/rss/30'),
|
||||
(u'Telekomunikacja', u'http://www.rynekinfrastruktury.pl/rss/31'),
|
||||
(u'Porty', u'http://www.rynekinfrastruktury.pl/rss/32'),
|
||||
(u'Prawo i polityka', u'http://www.rynekinfrastruktury.pl/rss/47'),
|
||||
(u'Komentarze', u'http://www.rynekinfrastruktury.pl/rss/38'),
|
||||
]
|
||||
|
||||
keep_only_tags =[]
|
||||
keep_only_tags.append(dict(name = 'div', attrs = {'class' : 'articleContent'}))
|
||||
|
||||
remove_tags =[]
|
||||
remove_tags.append(dict(name = 'span', attrs = {'class' : 'date'}))
|
||||
|
||||
def print_version(self, url):
|
||||
return url.replace('http://www.rynekinfrastruktury.pl/artykul/', 'http://www.rynekinfrastruktury.pl/artykul/drukuj/')
|
40
recipes/rynek_kolejowy.recipe
Normal file
@ -0,0 +1,40 @@
|
||||
#!/usr/bin/env python
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__author__ = 'teepel <teepel44@gmail.com>'
|
||||
|
||||
'''
|
||||
rynek-kolejowy.pl
|
||||
'''
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class rynek_kolejowy(BasicNewsRecipe):
|
||||
title = u'Rynek Kolejowy'
|
||||
__author__ = 'teepel <teepel44@gmail.com>'
|
||||
language = 'pl'
|
||||
description =u'Rynek Kolejowy - kalendarium wydarzeń branży kolejowej, konferencje, sympozja, targi kolejowe, krajowe i zagraniczne.'
|
||||
masthead_url='http://p.wnp.pl/images/i/partners/rynek_kolejowy.gif'
|
||||
remove_empty_feeds= True
|
||||
oldest_article = 1
|
||||
max_articles_per_feed = 100
|
||||
remove_javascript=True
|
||||
no_stylesheets=True
|
||||
|
||||
keep_only_tags =[]
|
||||
keep_only_tags.append(dict(name = 'div', attrs = {'id' : 'mainContent'}))
|
||||
|
||||
remove_tags =[]
|
||||
remove_tags.append(dict(name = 'div', attrs = {'class' : 'right no-print'}))
|
||||
remove_tags.append(dict(name = 'div', attrs = {'id' : 'font-size'}))
|
||||
remove_tags.append(dict(name = 'div', attrs = {'class' : 'no-print'}))
|
||||
|
||||
extra_css = '''.wiadomosc_title{ font-size: 1.4em; font-weight: bold; }'''
|
||||
|
||||
feeds = [(u'Wiadomości', u'http://www.rynek-kolejowy.pl/rss/rss.php')]
|
||||
|
||||
def print_version(self, url):
|
||||
segment = url.split('/')
|
||||
urlPart = segment[3]
|
||||
return 'http://www.rynek-kolejowy.pl/drukuj.php?id=' + urlPart
|
||||
|
26
recipes/samcik_blox.recipe
Normal file
@ -0,0 +1,26 @@
|
||||
#!/usr/bin/env python
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__author__ = 'teepel <teepel44@gmail.com>'
|
||||
|
||||
'''
|
||||
samcik.blox.pl
|
||||
'''
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class samcik(BasicNewsRecipe):
|
||||
title = u'Maciej Samcik Blog'
|
||||
__author__ = 'teepel <teepel44@gmail.com>'
|
||||
language = 'pl'
|
||||
description =u'Blog Macieja Samcika, długoletniego dziennikarza ekonomicznego Gazety Wyborczej . O finansach małych i dużych. Mnóstwo ciekawostek na temat pieniędzy.'
|
||||
oldest_article = 7
|
||||
max_articles_per_feed = 100
|
||||
remove_javascript=True
|
||||
no_stylesheets=True
|
||||
simultaneous_downloads = 3
|
||||
|
||||
remove_tags =[]
|
||||
remove_tags.append(dict(name = 'table', attrs = {'border' : '0'}))
|
||||
|
||||
feeds = [(u'Wpisy', u'http://samcik.blox.pl/rss2')]
|
47
recipes/satkurier.recipe
Normal file
@ -0,0 +1,47 @@
|
||||
#!/usr/bin/env python
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class SATKurier(BasicNewsRecipe):
|
||||
title = u'SATKurier.pl'
|
||||
__author__ = 'Artur Stachecki <artur.stachecki@gmail.com>'
|
||||
language = 'pl'
|
||||
description = u'Największy i najstarszy serwis poświęcony\
|
||||
telewizji cyfrowej, przygotowywany przez wydawcę\
|
||||
miesięcznika SAT Kurier. Bieżące wydarzenia\
|
||||
z rynku mediów i nowych technologii.'
|
||||
oldest_article = 7
|
||||
masthead_url = 'http://satkurier.pl/img/header_sk_logo.gif'
|
||||
max_articles_per_feed = 100
|
||||
simultaneous_downloads = 5
|
||||
remove_javascript = True
|
||||
no_stylesheets = True
|
||||
|
||||
keep_only_tags = []
|
||||
keep_only_tags.append(dict(name='div', attrs={'id': ['single_news', 'content']}))
|
||||
|
||||
remove_tags = []
|
||||
remove_tags.append(dict(attrs={'id': ['news_info', 'comments']}))
|
||||
remove_tags.append(dict(attrs={'href': '#czytaj'}))
|
||||
remove_tags.append(dict(attrs={'align': 'center'}))
|
||||
remove_tags.append(dict(attrs={'class': ['date', 'category', 'right mini-add-comment', 'socialLinks', 'commentlist']}))
|
||||
|
||||
remove_tags_after = [(dict(id='entry'))]
|
||||
|
||||
feeds = [(u'Najnowsze wiadomości', u'http://feeds.feedburner.com/satkurierpl?format=xml'),
|
||||
(u'Sport w telewizji', u'http://feeds.feedburner.com/satkurier/sport?format=xml'),
|
||||
(u'Blog', u'http://feeds.feedburner.com/satkurier/blog?format=xml')]
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
image = soup.find(attrs={'id': 'news_mini_photo'})
|
||||
if image:
|
||||
image.extract()
|
||||
header = soup.find('h1')
|
||||
header.replaceWith(header.prettify() + image.prettify())
|
||||
for alink in soup.findAll('a'):
|
||||
if alink.string is not None:
|
||||
tstr = alink.string
|
||||
alink.replaceWith(tstr)
|
||||
return soup
|
@ -17,6 +17,7 @@ class Sciencenews(BasicNewsRecipe):
|
||||
max_articles_per_feed = 100
|
||||
no_stylesheets = True
|
||||
use_embedded_content = False
|
||||
auto_cleanup = True
|
||||
timefmt = ' [%A, %d %B, %Y]'
|
||||
|
||||
extra_css = '''
|
||||
@ -31,14 +32,14 @@ class Sciencenews(BasicNewsRecipe):
|
||||
.credit{color:#A6A6A6;font-family:helvetica,arial ;font-size: xx-small ;}
|
||||
'''
|
||||
|
||||
keep_only_tags = [ dict(name='div', attrs={'id':'column_action'}) ]
|
||||
remove_tags_after = dict(name='ul', attrs={'id':'content_functions_bottom'})
|
||||
remove_tags = [
|
||||
dict(name='ul', attrs={'id':'content_functions_bottom'})
|
||||
,dict(name='div', attrs={'id':['content_functions_top','breadcrumb_content']})
|
||||
,dict(name='img', attrs={'class':'icon'})
|
||||
,dict(name='div', attrs={'class': 'embiggen'})
|
||||
]
|
||||
#keep_only_tags = [ dict(name='div', attrs={'id':'column_action'}) ]
|
||||
#remove_tags_after = dict(name='ul', attrs={'id':'content_functions_bottom'})
|
||||
#remove_tags = [
|
||||
#dict(name='ul', attrs={'id':'content_functions_bottom'})
|
||||
#,dict(name='div', attrs={'id':['content_functions_top','breadcrumb_content']})
|
||||
#,dict(name='img', attrs={'class':'icon'})
|
||||
#,dict(name='div', attrs={'class': 'embiggen'})
|
||||
#]
|
||||
|
||||
feeds = [(u"Science News / News Items", u'http://sciencenews.org/index.php/feed/type/news/name/news.rss/view/feed/name/all.rss')]
|
||||
|
||||
@ -53,9 +54,9 @@ class Sciencenews(BasicNewsRecipe):
|
||||
|
||||
return cover_url
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
#def preprocess_html(self, soup):
|
||||
|
||||
for tag in soup.findAll(name=['span']):
|
||||
tag.name = 'div'
|
||||
#for tag in soup.findAll(name=['span']):
|
||||
#tag.name = 'div'
|
||||
|
||||
return soup
|
||||
#return soup
|
||||
|
@ -16,7 +16,7 @@ class AdvancedUserRecipe1303841067(BasicNewsRecipe):
|
||||
no_stylesheets = True
|
||||
remove_javascript = True
|
||||
remove_empty_feeds = True
|
||||
language = 'de_DE'
|
||||
language = 'de'
|
||||
|
||||
#conversion_options = {'base_font_size': 20}
|
||||
|
||||
|
67
recipes/telepolis_pl.recipe
Normal file
@ -0,0 +1,67 @@
|
||||
#!/usr/bin/env python
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
import re
|
||||
|
||||
|
||||
class telepolis(BasicNewsRecipe):
|
||||
title = u'Telepolis.pl'
|
||||
__author__ = 'Artur Stachecki <artur.stachecki@gmail.com>'
|
||||
language = 'pl'
|
||||
description = u'Twój telekomunikacyjny serwis informacyjny.\
|
||||
Codzienne informacje, testy i artykuły,\
|
||||
promocje, baza telefonów oraz centrum rozrywki'
|
||||
oldest_article = 7
|
||||
masthead_url = 'http://telepolis.pl/i/telepolis-logo2.gif'
|
||||
max_articles_per_feed = 100
|
||||
simultaneous_downloads = 5
|
||||
remove_javascript = True
|
||||
no_stylesheets = True
|
||||
use_embedded_content = False
|
||||
|
||||
remove_tags = []
|
||||
remove_tags.append(dict(attrs={'alt': 'TELEPOLIS.pl'}))
|
||||
|
||||
preprocess_regexps = [(re.compile(r'<: .*? :>'),
|
||||
lambda match: ''),
|
||||
(re.compile(r'<b>Zobacz:</b>.*?</a>', re.DOTALL),
|
||||
lambda match: ''),
|
||||
(re.compile(r'<-ankieta.*?>'),
|
||||
lambda match: ''),
|
||||
(re.compile(r'\(Q\!\)'),
|
||||
lambda match: ''),
|
||||
(re.compile(r'\(plik.*?\)'),
|
||||
lambda match: ''),
|
||||
(re.compile(r'<br.*?><br.*?>', re.DOTALL),
|
||||
lambda match: '')
|
||||
]
|
||||
|
||||
extra_css = '''.tb { font-weight: bold; font-size: 20px;}'''
|
||||
|
||||
feeds = [
|
||||
(u'Wiadomości', u'http://www.telepolis.pl/rss/news.php'),
|
||||
(u'Artykuły', u'http://www.telepolis.pl/rss/artykuly.php')
|
||||
]
|
||||
|
||||
def print_version(self, url):
|
||||
if 'news.php' in url:
|
||||
print_url = url.replace('news.php', 'news_print.php')
|
||||
else:
|
||||
print_url = url.replace('artykuly.php', 'art_print.php')
|
||||
return print_url
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
for image in soup.findAll('img'):
|
||||
if 'm.jpg' in image['src']:
|
||||
image_big = image['src']
|
||||
image_big = image_big.replace('m.jpg', '.jpg')
|
||||
image['src'] = image_big
|
||||
logo = soup.find('tr')
|
||||
logo.extract()
|
||||
for tag in soup.findAll('tr'):
|
||||
for strings in ['Wiadomość wydrukowana', 'copyright']:
|
||||
if strings in self.tag_to_string(tag):
|
||||
tag.extract()
|
||||
return self.adeify_images(soup)
|
@ -14,26 +14,21 @@ class tvn24(BasicNewsRecipe):
|
||||
remove_empty_feeds = True
|
||||
remove_javascript = True
|
||||
no_stylesheets = True
|
||||
keep_only_tags=[
|
||||
# dict(name='h1', attrs={'class':'size38 mt20 pb20'}),
|
||||
dict(name='div', attrs={'class':'mainContainer'}),
|
||||
# dict(name='p'),
|
||||
# dict(attrs={'class':['size18 mt10 mb15', 'bold topicSize1', 'fromUsers content', 'textArticleDefault']})
|
||||
keep_only_tags=[
|
||||
# dict(name='h1', attrs={'class':'size38 mt20 pb20'}),
|
||||
dict(name='div', attrs={'class':'mainContainer'}),
|
||||
# dict(name='p'),
|
||||
# dict(attrs={'class':['size18 mt10 mb15', 'bold topicSize1', 'fromUsers content', 'textArticleDefault']})
|
||||
]
|
||||
remove_tags=[
|
||||
dict(attrs={'class':['commentsInfo', 'textSize', 'related newsNews align-right', 'box', 'watchMaterial text', 'related galleryGallery align-center', 'advert block-alignment-right', 'userActions', 'socialBookmarks', 'im yourArticle fl', 'dynamicButton addComment fl', 'innerArticleModule onRight cols externalContent', 'thumbsGallery', 'relatedObject customBlockquote align-right', 'lead', 'mainRightColumn', 'articleDateContainer borderGreyBottom', 'socialMediaContainer onRight loaded', 'quizContent', 'twitter', 'facebook', 'googlePlus', 'share', 'voteResult', 'reportTitleBar bgBlue_v4 mb15', 'innerVideoModule center']}),
|
||||
dict(name='article', attrs={'class':['singleArtPhotoCenter', 'singleArtPhotoRight', 'singleArtPhotoLeft']}),
|
||||
dict(name='section', attrs={'id':['forum', 'innerArticle', 'quiz toCenter', 'mb20']}),
|
||||
dict(name='div', attrs={'class':'socialMediaContainer big p20 mb20 borderGrey loaded'})
|
||||
]
|
||||
dict(attrs={'class':['commentsInfo', 'textSize', 'related newsNews align-right', 'box', 'watchMaterial text', 'related galleryGallery align-center', 'advert block-alignment-right', 'userActions', 'socialBookmarks', 'im yourArticle fl', 'dynamicButton addComment fl', 'innerArticleModule onRight cols externalContent', 'thumbsGallery', 'relatedObject customBlockquote align-right', 'lead', 'mainRightColumn', 'articleDateContainer borderGreyBottom', 'socialMediaContainer onRight loaded', 'quizContent', 'twitter', 'facebook', 'googlePlus', 'share', 'voteResult', 'reportTitleBar bgBlue_v4 mb15', 'innerVideoModule center']}),
|
||||
dict(name='article', attrs={'class':['singleArtPhotoCenter', 'singleArtPhotoRight', 'singleArtPhotoLeft']}),
|
||||
dict(name='section', attrs={'id':['forum', 'innerArticle', 'quiz toCenter', 'mb20']}),
|
||||
dict(name='div', attrs={'class':'socialMediaContainer big p20 mb20 borderGrey loaded'})
|
||||
]
|
||||
remove_tags_after=[dict(name='li', attrs={'class':'share'})]
|
||||
feeds = [(u'Najnowsze', u'http://www.tvn24.pl/najnowsze.xml'), ]
|
||||
#(u'Polska', u'www.tvn24.pl/polska.xml'), (u'\u015awiat', u'http://www.tvn24.pl/swiat.xml'), (u'Sport', u'http://www.tvn24.pl/sport.xml'), (u'Biznes', u'http://www.tvn24.pl/biznes.xml'), (u'Meteo', u'http://www.tvn24.pl/meteo.xml'), (u'Micha\u0142ki', u'http://www.tvn24.pl/michalki.xml'), (u'Kultura', u'http://www.tvn24.pl/kultura.xml')]
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
for item in soup.findAll(style=True):
|
||||
del item['style']
|
||||
return soup
|
||||
#(u'Polska', u'www.tvn24.pl/polska.xml'), (u'\u015awiat', u'http://www.tvn24.pl/swiat.xml'), (u'Sport', u'http://www.tvn24.pl/sport.xml'), (u'Biznes', u'http://www.tvn24.pl/biznes.xml'), (u'Meteo', u'http://www.tvn24.pl/meteo.xml'), (u'Micha\u0142ki', u'http://www.tvn24.pl/michalki.xml'), (u'Kultura', u'http://www.tvn24.pl/kultura.xml')]
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
for alink in soup.findAll('a'):
|
||||
|
44
recipes/tyden.cz.recipe
Normal file
@ -0,0 +1,44 @@
|
||||
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
|
||||
from __future__ import unicode_literals
|
||||
from calibre.web.feeds.recipes import BasicNewsRecipe
|
||||
|
||||
class tydenRecipe(BasicNewsRecipe):
|
||||
__author__ = 'bubak'
|
||||
title = u'Tyden.cz'
|
||||
publisher = u''
|
||||
description = ''
|
||||
oldest_article = 1
|
||||
max_articles_per_feed = 20
|
||||
|
||||
feeds = [
|
||||
(u'Domácí', u'http://www.tyden.cz/rss/rss.php?rubrika_id=6'),
|
||||
(u'Politika', u'http://www.tyden.cz/rss/rss.php?rubrika_id=173'),
|
||||
(u'Kauzy', u'http://www.tyden.cz/rss/rss.php?rubrika_id=340')
|
||||
]
|
||||
|
||||
|
||||
#encoding = 'iso-8859-2'
|
||||
language = 'cs'
|
||||
cover_url = 'http://www.tyden.cz/img/tyden-logo.png'
|
||||
remove_javascript = True
|
||||
no_stylesheets = True
|
||||
remove_attributes = []
|
||||
remove_tags_before = dict(name='p', attrs={'id':['breadcrumbs']})
|
||||
remove_tags_after = dict(name='p', attrs={'class':['author']})
|
||||
|
||||
visited_urls = {}
|
||||
def get_article_url(self, article):
|
||||
url = BasicNewsRecipe.get_article_url(self, article)
|
||||
if url in self.visited_urls:
|
||||
self.log.debug('Ignoring duplicate: ' + url)
|
||||
return None
|
||||
else:
|
||||
self.visited_urls[url] = True
|
||||
self.log.debug('Accepting: ' + url)
|
||||
return url
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
@ -4,7 +4,7 @@ class AdvancedUserRecipe1347997197(BasicNewsRecipe):
|
||||
title = u'XpatLoop.com'
|
||||
__author__ = 'laca'
|
||||
oldest_article = 7
|
||||
language = 'en_HUN'
|
||||
language = 'en_HU'
|
||||
auto_cleanup = True
|
||||
masthead_url = 'http://www.xpatloop.com/images/cms/xs_logo.gif'
|
||||
use_embedded_content = False
|
||||
|
@ -16,7 +16,7 @@ class ZeitDe(BasicNewsRecipe):
|
||||
category = 'news, Germany'
|
||||
timefmt = ' [%a, %d %b %Y]'
|
||||
publication_type = 'newspaper'
|
||||
language = 'de_DE'
|
||||
language = 'de'
|
||||
encoding = 'UTF-8'
|
||||
|
||||
oldest_article = 7
|
||||
|
BIN
resources/images/mimetypes/cbr.png
Normal file
After Width: | Height: | Size: 10 KiB |
BIN
resources/images/mimetypes/cbz.png
Normal file
After Width: | Height: | Size: 10 KiB |
@ -11,6 +11,7 @@ let g:syntastic_cpp_include_dirs = [
|
||||
\'/usr/include/freetype2',
|
||||
\'/usr/include/fontconfig',
|
||||
\'src/qtcurve/common', 'src/qtcurve',
|
||||
\'src/unrar',
|
||||
\'/usr/include/ImageMagick',
|
||||
\]
|
||||
let g:syntastic_c_include_dirs = g:syntastic_cpp_include_dirs
|
||||
|
@ -6,7 +6,7 @@ __license__ = 'GPL v3'
|
||||
__copyright__ = '2009, Kovid Goyal <kovid@kovidgoyal.net>'
|
||||
__docformat__ = 'restructuredtext en'
|
||||
|
||||
import os, socket, struct, subprocess
|
||||
import os, socket, struct, subprocess, sys, glob
|
||||
from distutils.spawn import find_executable
|
||||
|
||||
from PyQt4 import pyqtconfig
|
||||
@ -16,6 +16,7 @@ from setup import isosx, iswindows, islinux
|
||||
OSX_SDK = '/Developer/SDKs/MacOSX10.5.sdk'
|
||||
|
||||
os.environ['MACOSX_DEPLOYMENT_TARGET'] = '10.5'
|
||||
is64bit = sys.maxsize > 2**32
|
||||
|
||||
NMAKE = RC = msvc = MT = win_inc = win_lib = win_ddk = win_ddk_lib_dirs = None
|
||||
if iswindows:
|
||||
@ -35,7 +36,7 @@ if iswindows:
|
||||
MT = os.path.join(os.path.dirname(p), 'bin', 'mt.exe')
|
||||
MT = os.path.join(SDK, 'bin', 'mt.exe')
|
||||
os.environ['QMAKESPEC'] = 'win32-msvc'
|
||||
ICU = r'Q:\icu'
|
||||
ICU = os.environ.get('ICU_DIR', r'Q:\icu')
|
||||
|
||||
QMAKE = '/Volumes/sw/qt/bin/qmake' if isosx else 'qmake'
|
||||
if find_executable('qmake-qt4'):
|
||||
@ -121,7 +122,8 @@ if iswindows:
|
||||
zlib_lib_dirs = [sw_lib_dir]
|
||||
zlib_libs = ['zlib']
|
||||
|
||||
magick_inc_dirs = [os.path.join(prefix, 'build', 'ImageMagick-6.7.6')]
|
||||
md = glob.glob(os.path.join(prefix, 'build', 'ImageMagick-*'))[-1]
|
||||
magick_inc_dirs = [md]
|
||||
magick_lib_dirs = [os.path.join(magick_inc_dirs[0], 'VisualMagick', 'lib')]
|
||||
magick_libs = ['CORE_RL_wand_', 'CORE_RL_magick_']
|
||||
podofo_inc = os.path.join(sw_inc_dir, 'podofo')
|
||||
|
@ -18,7 +18,7 @@ from setup.build_environment import (chmlib_inc_dirs,
|
||||
msvc, MT, win_inc, win_lib, win_ddk, magick_inc_dirs, magick_lib_dirs,
|
||||
magick_libs, chmlib_lib_dirs, sqlite_inc_dirs, icu_inc_dirs,
|
||||
icu_lib_dirs, win_ddk_lib_dirs, ft_libs, ft_lib_dirs, ft_inc_dirs,
|
||||
zlib_libs, zlib_lib_dirs, zlib_inc_dirs)
|
||||
zlib_libs, zlib_lib_dirs, zlib_inc_dirs, is64bit)
|
||||
MT
|
||||
isunix = islinux or isosx or isbsd
|
||||
|
||||
@ -47,6 +47,13 @@ class Extension(object):
|
||||
self.ldflags = kwargs.get('ldflags', [])
|
||||
self.optional = kwargs.get('optional', False)
|
||||
self.needs_ddk = kwargs.get('needs_ddk', False)
|
||||
of = kwargs.get('optimize_level', None)
|
||||
if of is None:
|
||||
of = '/Ox' if iswindows else '-O3'
|
||||
else:
|
||||
flag = '/O%d' if iswindows else '-O%d'
|
||||
of = flag % of
|
||||
self.cflags.insert(0, of)
|
||||
|
||||
def preflight(self, obj_dir, compiler, linker, builder, cflags, ldflags):
|
||||
pass
|
||||
@ -176,6 +183,24 @@ extensions = [
|
||||
sip_files = ['calibre/gui2/progress_indicator/QProgressIndicator.sip']
|
||||
),
|
||||
|
||||
Extension('unrar',
|
||||
['unrar/%s.cpp'%(x.partition('.')[0]) for x in '''
|
||||
rar.o strlist.o strfn.o pathfn.o savepos.o smallfn.o global.o file.o
|
||||
filefn.o filcreat.o archive.o arcread.o unicode.o system.o
|
||||
isnt.o crypt.o crc.o rawread.o encname.o resource.o match.o
|
||||
timefn.o rdwrfn.o consio.o options.o ulinks.o errhnd.o rarvm.o
|
||||
secpassword.o rijndael.o getbits.o sha1.o extinfo.o extract.o
|
||||
volume.o list.o find.o unpack.o cmddata.o filestr.o scantree.o
|
||||
'''.split()] + ['calibre/utils/unrar.cpp'],
|
||||
inc_dirs=['unrar'],
|
||||
cflags = [('/' if iswindows else '-') + x for x in (
|
||||
'DSILENT', 'DRARDLL', 'DUNRAR')] + (
|
||||
[] if iswindows else ['-D_FILE_OFFSET_BITS=64',
|
||||
'-D_LARGEFILE_SOURCE']),
|
||||
optimize_level=2,
|
||||
libraries=['User32', 'Advapi32', 'kernel32', 'Shell32'] if iswindows else []
|
||||
),
|
||||
|
||||
]
|
||||
|
||||
|
||||
@ -239,7 +264,7 @@ if isunix:
|
||||
cxx = os.environ.get('CXX', 'g++')
|
||||
cflags = os.environ.get('OVERRIDE_CFLAGS',
|
||||
# '-Wall -DNDEBUG -ggdb -fno-strict-aliasing -pipe')
|
||||
'-O3 -Wall -DNDEBUG -fno-strict-aliasing -pipe')
|
||||
'-Wall -DNDEBUG -fno-strict-aliasing -pipe')
|
||||
cflags = shlex.split(cflags) + ['-fPIC']
|
||||
ldflags = os.environ.get('OVERRIDE_LDFLAGS', '-Wall')
|
||||
ldflags = shlex.split(ldflags)
|
||||
@ -274,10 +299,12 @@ if isosx:
|
||||
|
||||
if iswindows:
|
||||
cc = cxx = msvc.cc
|
||||
cflags = '/c /nologo /Ox /MD /W3 /EHsc /DNDEBUG'.split()
|
||||
cflags = '/c /nologo /MD /W3 /EHsc /DNDEBUG'.split()
|
||||
ldflags = '/DLL /nologo /INCREMENTAL:NO /NODEFAULTLIB:libcmt.lib'.split()
|
||||
#cflags = '/c /nologo /Ox /MD /W3 /EHsc /Zi'.split()
|
||||
#ldflags = '/DLL /nologo /INCREMENTAL:NO /DEBUG'.split()
|
||||
if is64bit:
|
||||
cflags.append('/GS-')
|
||||
|
||||
for p in win_inc:
|
||||
cflags.append('-I'+p)
|
||||
|