This commit is contained in:
Sengian 2011-06-05 16:58:15 +02:00
commit bbdb113a2c
321 changed files with 166025 additions and 80057 deletions

View File

@ -4,6 +4,7 @@ src/calibre/plugins
resources/images.qrc
src/calibre/manual/.build/
src/calibre/manual/cli/
src/calibre/manual/template_ref.rst
build
dist
docs
@ -31,3 +32,4 @@ nbproject/
.pydevproject
.settings/
*.DS_Store
calibre_plugins/

View File

@ -478,7 +478,7 @@
type: major
description : >
"You can now save your frequently used searches and access them with a single click. For details
see http://calibre-ebook.com/user_manual/gui.html#search-sort"
see http://manual.calibre-ebook.com/gui.html#search-sort"
- title: "Add searching by date/published date"
tickets: [5244]

View File

@ -19,6 +19,202 @@
# new recipes:
# - title:
- version: 0.8.4
date: 2011-06-03
new features:
- title: "New and much simpler interface for specifying column coloring via Preferences->Look & Feel->Column Coloring"
- title: "Driver for Trekstor eBook Player 5M, Samsung Galaxy SII I9100, Motorola Defy and miBuk GAMMA 6.2"
tickets: [792091, 791216]
- title: "Get Books: Add EpubBud, WH Smits and E-book Shoppe stores"
- title: "When deleting 'all formats except ...', do not delete if it leaves a book with no formats"
- title: "Change default toolbar to make it a little more new user friendly. The icons have been re-arranged and now the text is always visiblke by default. You can change that in Preferences->Look & Feeel and Preferences->Toolbar"
- title: "Windows installer: Remember and use previous settings for installing desktop icons, adding to path, etc. This makes the installer a little slower, complaints should go to Microsoft."
- title: "Template language: Add str_in_list and on_device formatter functions. Make debugging templates a little easier"
- title: "Allow the user to specify formatting for number type custom columns"
bug fixes:
- title: "Fix typo in NOOK TSR driver that prevented it from working on windows"
- title: "Fix quotes in identifiers causing Tag Browser to be blank."
tickets: [791044]
- title: "Speedup auto complete when there are lots of items (>2500) the downside being that non ASCII characters are not sorted correctly. The threshold can be controlled via Preferences->Tweaks"
tickets: [792191]
- title: "RTF Output: Fix handling of curly brackets"
tickets: [791805]
- title: "Fix searching in Get Books not working with non ASCII characters"
tickets: [791788]
- title: "Fix excessive memory consumption when moving very large files during a metadata change"
tickets: [791806]
- title: "Fix series index being overwritten even when series is turned off in bulk metadata download"
tickets: [789990]
- title: "Fix regression in templates where id and other non standard fields no longer worked."
- title: "EPUB Output: Fix crash caused by ids with non-ascii characters in them"
- title: "Try to preserve the timestamps of files in a ZIP container"
- title: "After adding books always select the most recently added book."
tickets: [789343]
improved recipes:
- bild.de
- CNN
- BBC News (fast)
- Dilema Veche
new recipes:
- title: Metro UK
author: Dave Asbury
- title: Alt om Herning and Version2.dk
author: Rasmus Lauritsen
- title: Observatorul cultural
author: song2
- version: 0.8.3
date: 2011-05-27
new features:
- title: "Allow the coloring of columns in the book list."
description: "You can either create a custom column with a fixed set of values and assign a color to each value, or you can use the calibre template language to color any column in arbitrarily powerful ways. For example, you can have the title appear in red if the book has a particular tag."
type: major
- title: "Support for the Nook Simple Reader"
- title: "Get Books, new stores: Virtualo, lulu.net"
- title: "A store chooser dialog for Get Books (click the little preferences icon at the bottom of the Get Books screen)."
- title: "Add a merge_lists, and, or, not template functions to the calibre template language"
- title: "EPUB Output: Change any white-space:pre declarations in the CSS to pre-wrap to accomodate readers that cannot scroll horizontally."
tickets: [786722]
- title: "Windows installer: Remember and use previous installation folder when upgrading. Note that this will work for future upgrades, after this one."
bug fixes:
- title: "MOBI Output: Fix hidden tags with id attributes also hiding their trailing text"
tickets: [788570]
- title: "Fix switching from one news source to another via a search not saving changes to the scheduling of the first source"
tickets: [774849]
- title: "Dont allow user to use non email usernames when setting up Hotmail or Gmail accounts"
- title: "Amazon metadata download: Use separate identifiers for country specific downloads so that the links to Amazon in the Book details panel work when downloading metadata from country specific amazon websites."
tickets: [786146]
- title: "Nicer error message when user attempts to set title/author via Edit metadata dialog and one of the files is open in another program."
- title: "Fix {id} not working in send to device templates"
- title: "Windows: If creating a bytestring temp dir fails, create a unicode one and hope the rest of calibre can handle it"
- title: "Get Books: Fix some results from Amazon missing."
tickets: [785962]
improved recipes:
- Kathermini
- Faz.net
- The Washington Post
- El Mundo
- Marca
- The Nation
new recipes:
- title: Various German news sources
author: schuster
- title: "George R. R. Martin's Blog"
author: Darko Miletic
- title: "Focus (DE) and National Geographic"
auhtor: Anonymous
- version: 0.8.2
date: 2011-05-20
new features:
- title: "Various new ebook sources added to Get Books: Google Books, O'Reilly, archive.org, some Polish ebooks stores, etc."
- title: "Amazon metadata download: Allow user to configure Amazon plugin to use any of the US, UK, German, French and Italian Amazon websites"
- title: "When deleting large numbers of books, give the user the option to skip the Recycle Bin, since sending lots of files to the recycle bin can be very slow."
tickets: [784987]
- title: "OS X: The unified title+toolbar was disabled as it had various bugs. If you really want it you can turn it on again via Preferences->Tweaks, but be aware that you will see problems like the calibre windowd being too wide, weird animations when a device is detected, etc."
- title: "Add a tweak that controls what words are treated as suffixes when generating an author sort string from an author name."
- title: "Get Books: Store last few searches in history"
bug fixes:
- title: "Fix a crash when a device is connected/disconnected while a modal dialog opened from the toolbar is visible"
tickets: [780484]
- title: "Fix incorrect results from ebooks.com when searching via Get Books"
- title: "Metadata plugboards: Add prioritization scheme to allow for using different settings for different locations"
tickets: [783229]
- title: "Fix manage authors dialog too wide"
tickets: [783065]
- title: "Fix multiple bracket types in author names not handled correctly when generating author sort string"
tickets: [782551]
- title: "MOBI Input: Don't error out when detecting TOC structure if one of the elements has an invalid margin unit"
- title: "More fixes for japanese language calibre on windows"
tickets: [782408]
- title: "Linux binaries: Always use either Cleanlook or Plastique styles for the GUI if no style can be loaded from the host computer"
improved recipes:
- Newsweek
- Economist
- Dvhn
- United Daily
- Dagens Nyheter
- GoComics
- faz.net
- golem.de
new recipes:
- title: National Geographic
author: gagsays
- title: Various German news sources
author: schuster
- title: Dilema Veche
author: Silviu Cotoara
- title: "Glamour, Good to Know, Good Housekeeping and Men's Health"
author: Anonymous
- title: "Financial Sense and iProfessional"
author: Darko Miletic
- version: 0.8.1
date: 2011-05-13
@ -456,7 +652,7 @@
- title: "FB2 Output: Option to set the FB2 genre explicitly."
tickets: [743178]
- title: "Plugin developers: calibre now has a new plugin API, see http://calibre-ebook.com/user_manual/creating_plugins.html. Your existing plugins should continue to work, but it would be good to test them to make sure."
- title: "Plugin developers: calibre now has a new plugin API, see http://manual.calibre-ebook.com/creating_plugins.html. Your existing plugins should continue to work, but it would be good to test them to make sure."
bug fixes:
- title: "Fix text color in the search bar set to black instead of the system font color"
@ -841,7 +1037,7 @@
new features:
- title: "Tag Browser: Support the creation of nested User Categories"
description: "See http://calibre-ebook.com/user_manual/gui.html#tag-browser for details"
description: "See http://manual.calibre-ebook.com/gui.html#tag-browser for details"
type: major
- title: "Disable Kent District Library plugin to download series information. The website could not handle the load calibre's 2 million users put on it. You can manually re-enable it if you really want series information, but it is very slow"
@ -3714,7 +3910,7 @@
type: major
description: >
"You can now change the icons used in the User Interface and other static resources. Details on how to
do this are at: http://calibre-ebook.com/user_manual/customize.html#overriding-icons-templates-etcetera"
do this are at: http://manual.calibre-ebook.com/customize.html#overriding-icons-templates-etcetera"
- title: "Split the 'Send to device' button into two buttons, 'Connect/share' and 'Send to device'. The new 'Send to device' button will now only be available when a device is connected."

View File

@ -3,7 +3,7 @@ calibre supports installation from source, only on Linux.
Note that you *do not* need to install from source to hack on
the calibre source code. To get started with calibre development,
use a normal calibre install and follow the instructions at
http://calibre-ebook.com/user_manual/develop.html
http://manual.calibre-ebook.com/develop.html
On Linux, there are two kinds of installation from source possible.
Note that both kinds require lots of dependencies as well as a

4
README
View File

@ -6,8 +6,8 @@ reading. It is cross platform, running on Linux, Windows and OS X.
For screenshots: https://calibre-ebook.com/demo
For installation/usage instructions please see
http://calibre-ebook.com/user_manual
For usage instructions please see
http://manual.calibre-ebook.com
For source code access:
bzr branch lp:calibre

BIN
icons/favicon.ico Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 158 KiB

View File

@ -0,0 +1,42 @@
from calibre.web.feeds.recipes import BasicNewsRecipe
class AdvancedUserRecipe(BasicNewsRecipe):
title = u'Aachener Nachrichten'
__author__ = 'schuster'
oldest_article = 1
max_articles_per_feed = 100
use_embedded_content = False
language = 'de'
remove_javascript = True
cover_url = 'http://www.an-online.de/einwaage/images/an_logo.png'
masthead_url = 'http://www.an-online.de/einwaage/images/an_logo.png'
extra_css = '''
.fliesstext_detail:{margin-bottom:10%;}
.headline_1:{margin-bottom:25%;}
b{font-family:Arial,Helvetica,sans-serif; font-weight:200;font-size:large;}
a{font-family:Arial,Helvetica,sans-serif; font-weight:400;font-size:large;}
ll{font-family:Arial,Helvetica,sans-serif; font-weight:100;font-size:large;}
h4{font-family:Arial,Helvetica,sans-serif; font-weight:normal;font-size:small;}
img {min-width:300px; max-width:600px; min-height:300px; max-height:800px}
dd{font-family:Arial,Helvetica,sans-serif;font-size:large;}
body{font-family:Helvetica,Arial,sans-serif;font-size:small;}
'''
keep_only_tags = [
dict(name='span', attrs={'class':['fliesstext_detail', 'headline_1', 'autor_detail']}),
dict(id=['header-logo'])
]
feeds = [(u'Euregio', u'http://www.an-online.de/an/rss/Euregio.xml'),
(u'Aachen', u'http://www.an-online.de/an/rss/Aachen.xml'),
(u'Nordkreis', u'http://www.an-online.de/an/rss/Nordkreis.xml'),
(u'Düren', u'http://www.an-online.de/an/rss/Dueren.xml'),
(u'Eiffel', u'http://www.an-online.de/an/rss/Eifel.xml'),
(u'Eschweiler', u'http://www.an-online.de/an/rss/Eschweiler.xml'),
(u'Geilenkirchen', u'http://www.an-online.de/an/rss/Geilenkirchen.xml'),
(u'Heinsberg', u'http://www.an-online.de/an/rss/Heinsberg.xml'),
(u'Jülich', u'http://www.an-online.de/an/rss/Juelich.xml'),
(u'Stolberg', u'http://www.an-online.de/an/rss/Stolberg.xml'),
(u'Ratgebenr', u'http://www.an-online.de/an/rss/Ratgeber.xml')]

View File

@ -0,0 +1,43 @@
__license__ = 'GPL v3'
__copyright__ = '2011, Rasmus Lauritsen <rasmus at lauritsen.info>'
'''
aoh.dk
'''
from calibre.web.feeds.news import BasicNewsRecipe
class aoh_dk(BasicNewsRecipe):
title = 'Alt om Herning'
__author__ = 'Rasmus Lauritsen'
description = 'Nyheder fra Herning om omegn'
publisher = 'Mediehuset Herning Folkeblad'
category = 'news, local, Denmark'
oldest_article = 14
max_articles_per_feed = 50
no_stylesheets = True
delay = 1
encoding = 'utf8'
use_embedded_content = False
language = 'da'
extra_css = """ body{font-family: Verdana,Arial,sans-serif }
img{margin-bottom: 0.4em}
.txtContent,.stamp{font-size: small}
"""
conversion_options = {
'comment' : description
, 'tags' : category
, 'publisher' : publisher
, 'language' : language
}
feeds = [(u'All news', u'http://aoh.dk/rss.xml')]
keep_only_tags = [
dict(name='h1')
,dict(name='span', attrs={'class':['frontpage_body']})
]
remove_tags = [
dict(name=['object','link'])
]

View File

@ -1,27 +1,30 @@
__license__ = 'GPL v3'
__copyright__ = '2010, Darko Miletic <darko.miletic at gmail.com>'
__copyright__ = '2010 - 2011, Darko Miletic <darko.miletic at gmail.com>'
'''
news.bbc.co.uk
'''
import re
from calibre.web.feeds.recipes import BasicNewsRecipe
class BBC(BasicNewsRecipe):
title = 'BBC News (fast)'
__author__ = 'Darko Miletic, Starson17'
description = 'News from UK. A much faster version that does not download pictures'
description = 'Visit BBC News for up-to-the-minute news, breaking news, video, audio and feature stories. BBC News provides trusted World and UK news as well as local and regional perspectives. Also entertainment, business, science, technology and health news.'
oldest_article = 2
max_articles_per_feed = 100
no_stylesheets = True
#delay = 1
use_embedded_content = False
encoding = 'utf8'
publisher = 'BBC'
category = 'news, UK, world'
language = 'en_GB'
publication_type = 'newsportal'
extra_css = ' body{ font-family: Verdana,Helvetica,Arial,sans-serif } .introduction{font-weight: bold} .story-feature{display: block; padding: 0; border: 1px solid; width: 40%; font-size: small} .story-feature h2{text-align: center; text-transform: uppercase} '
preprocess_regexps = [(re.compile(r'<!--.*?-->', re.DOTALL), lambda m: '')]
masthead_url = 'http://news.bbcimg.co.uk/img/1_0_1/cream/hi/news/news-blocks.gif'
extra_css = """
body{ font-family: Verdana,Helvetica,Arial,sans-serif }
.introduction{font-weight: bold}
.story-feature{display: block; padding: 0; border: 1px solid; width: 40%; font-size: small}
.story-feature h2{text-align: center; text-transform: uppercase}
"""
conversion_options = {
'comments' : description
,'tags' : category
@ -31,31 +34,54 @@ class BBC(BasicNewsRecipe):
}
keep_only_tags = [
dict(name='div', attrs={'class':['layout-block-a layout-block']})
,dict(attrs={'class':['story-body','storybody']})
dict(name='div', attrs={'class':['layout-block-a layout-block']})
,dict(attrs={'class':['story-body','storybody']})
,dict(attrs={'id':['meta-information','story-body']})
]
remove_tags = [
dict(name='div', attrs={'class':['story-feature related narrow', 'share-help', 'embedded-hyper', \
'story-feature wide ', 'story-feature narrow']})
, dict(name=['img'])
]
dict(name='div', attrs={'class':['story-feature related narrow', \
'share-help', 'embedded-hyper', \
'story-feature wide ', \
'story-feature narrow', \
'hidden','story-actions', \
'embedded-hyper']})
,dict(name=['img','meta','link','object','embed','iframe','base'])
,dict(attrs={'class':['hidden','videoInStoryC']})
,dict(attrs={'id':['bbccom_sponsor_section','toggle-controls', \
'toggle-images','toggle-title']})
]
remove_attributes = ['width','height']
remove_attributes = ['width','height','xmlns:og','lang','clear']
feeds = [
('News Front Page', 'http://newsrss.bbc.co.uk/rss/newsonline_world_edition/front_page/rss.xml'),
('Science/Nature', 'http://newsrss.bbc.co.uk/rss/newsonline_world_edition/science/nature/rss.xml'),
('Technology', 'http://newsrss.bbc.co.uk/rss/newsonline_world_edition/technology/rss.xml'),
('Entertainment', 'http://newsrss.bbc.co.uk/rss/newsonline_world_edition/entertainment/rss.xml'),
('Magazine', 'http://newsrss.bbc.co.uk/rss/newsonline_world_edition/uk_news/magazine/rss.xml'),
('Business', 'http://newsrss.bbc.co.uk/rss/newsonline_world_edition/business/rss.xml'),
('Health', 'http://newsrss.bbc.co.uk/rss/newsonline_world_edition/health/rss.xml'),
('Americas', 'http://newsrss.bbc.co.uk/rss/newsonline_world_edition/americas/rss.xml'),
('Europe', 'http://newsrss.bbc.co.uk/rss/newsonline_world_edition/europe/rss.xml'),
('South Asia', 'http://newsrss.bbc.co.uk/rss/newsonline_world_edition/south_asia/rss.xml'),
('UK', 'http://newsrss.bbc.co.uk/rss/newsonline_world_edition/uk_news/rss.xml'),
('Asia-Pacific', 'http://newsrss.bbc.co.uk/rss/newsonline_world_edition/asia-pacific/rss.xml'),
('Africa', 'http://newsrss.bbc.co.uk/rss/newsonline_world_edition/africa/rss.xml'),
('Top Stories' , 'http://feeds.bbci.co.uk/news/rss.xml' ),
('Science/Environment', 'http://feeds.bbci.co.uk/news/science_and_environment/rss.xml'),
('Technology' , 'http://feeds.bbci.co.uk/news/technology/rss.xml' ),
('Entertainment/Arts' , 'http://feeds.bbci.co.uk/news/entertainment_and_arts/rss.xml' ),
('Magazine' , 'http://feeds.bbci.co.uk/news/magazine/rss.xml' ),
('Business' , 'http://feeds.bbci.co.uk/news/business/rss.xml' ),
('Politics' , 'http://feeds.bbci.co.uk/news/politics/rss.xml' ),
('Health' , 'http://feeds.bbci.co.uk/news/health/rss.xml' ),
('US&Canada' , 'http://feeds.bbci.co.uk/news/world/us_and_canada/rss.xml' ),
('Latin America' , 'http://feeds.bbci.co.uk/news/world/latin_america/rss.xml' ),
('Europe' , 'http://feeds.bbci.co.uk/news/world/europe/rss.xml' ),
('South Asia' , 'http://feeds.bbci.co.uk/news/world/south_asia/rss.xml' ),
('England' , 'http://feeds.bbci.co.uk/news/england/rss.xml' ),
('Asia-Pacific' , 'http://feeds.bbci.co.uk/news/world/asia_pacific/rss.xml' ),
('Africa' , 'http://feeds.bbci.co.uk/news/world/africa/rss.xml' )
]
def preprocess_html(self, soup):
for item in soup.findAll(style=True):
del item['style']
for item in soup.findAll('left'):
item.name='span'
for item in soup.findAll('a'):
if item.string is not None:
str = item.string
item.replaceWith(str)
else:
str = self.tag_to_string(item)
item.replaceWith(str)
return soup

74
recipes/bild_de.recipe Normal file
View File

@ -0,0 +1,74 @@
# -*- coding: utf-8 -*-
from calibre.web.feeds.recipes import BasicNewsRecipe
class AdvancedUserRecipe1303841067(BasicNewsRecipe):
title = u'Bild.de'
__author__ = 'schuster'
oldest_article = 1
max_articles_per_feed = 100
no_stylesheets = True
use_embedded_content = False
language = 'de'
remove_javascript = True
# get cover from myspace
cover_url = 'http://a3.l3-images.myspacecdn.com/images02/56/0232f842170b4d349779f8379c27e073/l.jpg'
masthead_url = 'http://a3.l3-images.myspacecdn.com/images02/56/0232f842170b4d349779f8379c27e073/l.jpg'
# set what to fetch on the site
remove_tags_before = dict(name = 'h2', attrs={'id':'cover'})
remove_tags_after = dict(name ='div', attrs={'class':'back'})
# remove things on the site that we don't want
remove_tags = [dict(name='div', attrs={'class':'credit'}),
dict(name='div', attrs={'class':'index'}),
dict(name='div', attrs={'id':'zstart31'}),
dict(name='div', attrs={'class':'hentry'}),
dict(name='div', attrs={'class':'back'}),
dict(name='div', attrs={'class':'pagination'}),
dict(name='div', attrs={'class':'header'}),
dict(name='div', attrs={'class':'element floatL'}),
dict(name='div', attrs={'class':'stWrap'})
]
# thanx to kiklop74 for code (see sticky thread -> Recipes - Re-usable code)
# this one removes a lot of direct-link's
def preprocess_html(self, soup):
for alink in soup.findAll('a'):
if alink.string is not None:
tstr = alink.string
alink.replaceWith(tstr)
return soup
# remove the ad's
filter_regexps = [r'.\.smartadserver\.com']
def skip_ad_pages(self, soup):
return None
#get the real url behind .feedsportal.com and fetch the artikels
def get_article_url(self, article):
return article.get('id', article.get('guid', None))
#list of the rss source from www.bild.de
feeds = [(u'Überblick', u'http://rss.bild.de/bild.xml'),
(u'News', u'http://rss.bild.de/bild-news.xml'),
(u'Politik', u'http://rss.bild.de/bild-politik.xml'),
(u'Unterhaltung', u'http://rss.bild.de/bild-unterhaltung.xml'),
(u'Sport', u'http://rss.bild.de/bild-sport.xml'),
(u'Lifestyle', u'http://rss.bild.de/bild-lifestyle.xml'),
(u'Ratgeber', u'http://rss.bild.de/bild-ratgeber.xml'),
(u'Reg. - Berlin', u'http://rss.bild.de/bild-berlin.xml'),
(u'Reg. - Bremen', u'http://rss.bild.de/bild-bremen.xml'),
(u'Reg. - Dresden', u'http://rss.bild.de/bild-dresden.xml'),
(u'Reg. - Düsseldorf', u'http://rss.bild.de/bild-duesseldorf.xml'),
(u'Reg. - Frankfurt-Main', u'http://rss.bild.de/bild-frankfurt-main.xml'),
(u'Reg. - Hamburg', u'http://rss.bild.de/bild-hamburg.xml'),
(u'Reg. - Hannover', u'http://rss.bild.de/bild-hannover.xml'),
(u'Reg. - Köln', u'http://rss.bild.de/bild-koeln.xml'),
(u'Reg. - Leipzig', u'http://rss.bild.de/bild-leipzig.xml'),
(u'Reg. - München', u'http://rss.bild.de/bild-muenchen.xml'),
(u'Reg. - Ruhrgebiet', u'http://rss.bild.de/bild-ruhrgebiet.xml'),
(u'Reg. - Stuttgart', u'http://rss.bild.de/bild-stuttgart.xml')
]

View File

@ -0,0 +1,33 @@
from calibre.web.feeds.recipes import BasicNewsRecipe
class AdvancedUserRecipe1303841067(BasicNewsRecipe):
title = u'Börse-online'
__author__ = 'schuster'
oldest_article = 1
max_articles_per_feed = 100
no_stylesheets = True
use_embedded_content = False
language = 'de'
remove_javascript = True
cover_url = 'http://www.dpv.de/images/1995/source.gif'
masthead_url = 'http://www.zeitschriften-cover.de/cover/boerse-online-cover-januar-2010-x1387.jpg'
extra_css = '''
h1{font-family:Arial,Helvetica,sans-serif; font-weight:bold;font-size:large;}
h4{font-family:Arial,Helvetica,sans-serif; font-weight:normal;font-size:small;}
img {min-width:300px; max-width:600px; min-height:300px; max-height:800px}
p{font-family:Arial,Helvetica,sans-serif;font-size:small;}
body{font-family:Helvetica,Arial,sans-serif;font-size:small;}
'''
remove_tags_bevor = [dict(name='h3')]
remove_tags_after = [dict(name='div', attrs={'class':'artikelfuss'})]
remove_tags = [dict(attrs={'class':['moduleTopNav', 'moduleHeaderNav', 'text', 'blau', 'poll1150']}),
dict(id=['newsletterlayer', 'newsletterlayerClose', 'newsletterlayer_body', 'newsletterarray_error', 'newsletterlayer_emailadress', 'newsletterlayer_submit', 'kommentar']),
dict(name=['h2', 'Gesamtranking', 'h3',''])]
def print_version(self, url):
return url.replace('.html#nv=rss', '.html?mode=print')
feeds = [(u'Börsennachrichten', u'http://www.boerse-online.de/rss/')]

61
recipes/capital_de.recipe Normal file
View File

@ -0,0 +1,61 @@
from calibre.web.feeds.news import BasicNewsRecipe
class AdvancedUserRecipe1305470859(BasicNewsRecipe):
title = u'Capital.de'
language = 'de'
__author__ = 'schuster'
oldest_article =7
max_articles_per_feed = 35
no_stylesheets = True
remove_javascript = True
use_embedded_content = False
masthead_url = 'http://www.wirtschaftsmedien-shop.de/media/stores/wirtschaftsmedien/capital/teaser_large_abo.jpg'
cover_url = 'http://d1kb9jvg6ylufe.cloudfront.net/WebsiteCMS/de/unternehmen/linktipps/mainColumn/08/image/DE_Capital_bis20mm_SW.jpg'
extra_css = '''
h1{font-family:Arial,Helvetica,sans-serif; font-weight:bold;font-size:large;}
h4{font-family:Arial,Helvetica,sans-serif; font-weight:normal;font-size:small;}
img {min-width:300px; max-width:600px; min-height:300px; max-height:800px}
p{font-family:Arial,Helvetica,sans-serif;font-size:small;}
body{font-family:Helvetica,Arial,sans-serif;font-size:small;}
'''
def print_version(self, url):
return url.replace ('nv=rss#utm_source=rss2&utm_medium=rss_feed&utm_campaign=/', 'mode=print')
remove_tags_bevor = [dict(name='td', attrs={'class':'textcell'})]
remove_tags_after = [dict(name='div', attrs={'class':'artikelsplit'})]
feeds = [ (u'Wirtschaftsmagazin', u'http://www.capital.de/rss/'),
(u'Unternehmen', u'http://www.capital.de/rss/unternehmen'),
(u'Finanz & Geldanlage', u'http://www.capital.de/rss/finanzen/geldanlage')]
def append_page(self, soup, appendtag, position):
pager = soup.find('div',attrs={'class':'artikelsplit'})
if pager:
nexturl = self.INDEX + pager.a['href']
soup2 = self.index_to_soup(nexturl)
texttag = soup2.find('div', attrs={'class':'printable'})
for it in texttag.findAll(style=True):
del it['style']
newpos = len(texttag.contents)
self.append_page(soup2,texttag,newpos)
texttag.extract()
appendtag.insert(position,texttag)
def preprocess_html(self, soup):
for item in soup.findAll(style=True):
del item['style']
for item in soup.findAll('div', attrs={'class':'artikelsplit'}):
item.extract()
self.append_page(soup, soup.body, 3)
pager = soup.find('div',attrs={'class':'artikelsplit'})
if pager:
pager.extract()
return self.adeify_images(soup)
remove_tags = [dict(attrs={'class':['navSeitenAlle', 'kommentieren', 'teaserheader', 'teasercontent', 'info', 'zwischenhead', 'artikelsplit']}),
dict(id=['topNav', 'mainNav', 'subNav', 'socialmedia', 'footerRahmen', 'gatrixx_marktinformationen', 'pager', 'weitere']),
dict(span=['ratingtext', 'Gesamtranking', 'h3','']),
dict(rel=['canonical'])]

View File

@ -3,71 +3,39 @@ __copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
'''
Profile to download CNN
'''
import re
from calibre.web.feeds.news import BasicNewsRecipe
from calibre.ebooks.BeautifulSoup import BeautifulSoup
class CNN(BasicNewsRecipe):
title = 'CNN'
description = 'Global news'
timefmt = ' [%d %b %Y]'
__author__ = 'Krittika Goyal and Sujata Raman'
__author__ = 'Kovid Goyal'
language = 'en'
no_stylesheets = True
use_embedded_content = False
oldest_article = 15
recursions = 1
match_regexps = [r'http://sportsillustrated.cnn.com/.*/[1-9].html']
#recursions = 1
#match_regexps = [r'http://sportsillustrated.cnn.com/.*/[1-9].html']
max_articles_per_feed = 25
extra_css = '''
.cnn_strycntntlft{font-family :Arial,Helvetica,sans-serif;}
h2{font-family :Arial,Helvetica,sans-serif; font-size:x-small}
.cnnTxtCmpnt{font-family :Arial,Helvetica,sans-serif; font-size:x-small}
.cnnTMcontent{font-family :Arial,Helvetica,sans-serif; font-size:x-small;color:#575757}
.storytext{font-family :Arial,Helvetica,sans-serif; font-size:small}
.storybyline{font-family :Arial,Helvetica,sans-serif; font-size:x-small; color:#575757}
.credit{font-family :Arial,Helvetica,sans-serif; font-size:xx-small; color:#575757}
.storyBrandingBanner{font-family :Arial,Helvetica,sans-serif; font-size:x-small; color:#575757}
.storytimestamp{font-family :Arial,Helvetica,sans-serif; font-size:x-small; color:#575757}
.timestamp{font-family :Arial,Helvetica,sans-serif; font-size:x-small; color:#575757}
.cnn_strytmstmp{font-family :Arial,Helvetica,sans-serif; font-size:x-small; color:#666666;}
.cnn_stryimg640caption{font-family :Arial,Helvetica,sans-serif; font-size:x-small; color:#666666;}
.cnn_strylccimg300cntr{font-family :Arial,Helvetica,sans-serif; font-size:x-small; color:#666666;}
.cnn_stryichgfcpt{font-family :Arial,Helvetica,sans-serif; font-size:x-small; color:#666666;}
.cnnByline{font-family :Arial,Helvetica,sans-serif; font-size:x-small; color:#666666;}
.cnn_bulletbin cnnStryHghLght{ font-size:xx-small;}
.subhead p{font-family :Arial,Helvetica,sans-serif; font-size:x-small;}
.cnnStoryContent{font-family :Arial,Helvetica,sans-serif; font-size:x-small}
.cnnContentContainer{font-family :Arial,Helvetica,sans-serif; font-size:x-small}
.col1{font-family :Arial,Helvetica,sans-serif; font-size:x-small; color:#666666;}
.col3{color:#333333; font-family :Arial,Helvetica,sans-serif; font-size:x-small;font-weight:bold;}
.cnnInlineT1Caption{font-family :Arial,Helvetica,sans-serif; font-size:x-small;font-weight:bold;}
.cnnInlineT1Credit{font-family :Arial,Helvetica,sans-serif; font-size:x-small;color:#333333;}
.col10{color:#5A637E;}
.cnnInlineRailBulletList{color:black;}
.cnnLine0{font-family :Arial,Helvetica,sans-serif; color:#666666;font-weight:bold;}
.cnnTimeStamp{font-family :Arial,Helvetica,sans-serif; font-size:x-small;color:#333333;}
.galleryhedDek{font-family :Arial,Helvetica,sans-serif; font-size:x-small;color:#575757;}
.galleryWidgetHeader{font-family :Arial,Helvetica,sans-serif; font-size:x-small;color:#004276;}
.article-content{font-family :Arial,Helvetica,sans-serif; font-size:x-small}
.cnnRecapStory{font-family :Arial,Helvetica,sans-serif; font-size:x-small}
h1{font-family :Arial,Helvetica,sans-serif; font-size:x-large}
.captionname{font-family :Arial,Helvetica,sans-serif; font-size:x-small;color:#575757;}
inStoryIE{{font-family :Arial,Helvetica,sans-serif; font-size:x-small;}
'''
#remove_tags_before = dict(name='h1', attrs={'class':'heading'})
#remove_tags_after = dict(name='td', attrs={'class':'newptool1'})
remove_tags = [
dict(name='iframe'),
dict(name='div', attrs={'class':['cnnEndOfStory', 'cnnShareThisItem', 'cnn_strylctcntr cnn_strylctcqrelt', 'cnnShareBoxContent', 'cnn_strybtmcntnt', 'cnn_strycntntrgt']}),
dict(name='div', attrs={'id':['IEContainer', 'clickIncludeBox']}),
#dict(name='ul', attrs={'class':'article-tools'}),
#dict(name='ul', attrs={'class':'articleTools'}),
preprocess_regexps = [
(re.compile(r'<!--\[if.*if\]-->', re.DOTALL), lambda m: ''),
(re.compile(r'<script.*?</script>', re.DOTALL), lambda m: ''),
(re.compile(r'<style.*?</style>', re.DOTALL), lambda m: ''),
]
keep_only_tags = [dict(id='cnnContentContainer')]
remove_tags = [
{'class':['cnn_strybtntools', 'cnn_strylftcntnt',
'cnn_strybtntools', 'cnn_strybtntoolsbttm', 'cnn_strybtmcntnt',
'cnn_strycntntrgt']},
]
feeds = [
('Top News', 'http://rss.cnn.com/rss/cnn_topstories.rss'),
('World', 'http://rss.cnn.com/rss/cnn_world.rss'),
@ -84,15 +52,8 @@ class CNN(BasicNewsRecipe):
('Offbeat', 'http://rss.cnn.com/rss/cnn_offbeat.rss'),
('Most Popular', 'http://rss.cnn.com/rss/cnn_mostpopular.rss')
]
def preprocess_html(self, soup):
story = soup.find(name='div', attrs={'class':'cnnBody_Left'})
if story is None:
story = soup.find(name='div', attrs={'id':'cnnContentContainer'})
soup = BeautifulSoup('<html><head><title>t</title></head><body></body></html>')
body = soup.find(name='body')
body.insert(0, story)
else:
soup = BeautifulSoup('<html><head><title>t</title></head><body></body></html>')
body = soup.find(name='body')
body.insert(0, story)
return soup
def get_article_url(self, article):
ans = BasicNewsRecipe.get_article_url(self, article)
return ans.partition('?')[0]

View File

@ -0,0 +1,34 @@
from calibre.web.feeds.news import BasicNewsRecipe
class AdvancedUserRecipe1305567197(BasicNewsRecipe):
title = u'Cosmopolitan.de'
__author__ = 'schuster'
oldest_article = 7
language = 'de'
max_articles_per_feed = 100
no_stylesheets = True
use_embedded_content = False
remove_javascript = True
cover_url = 'http://www.cosmopolitan.com/cm/shared/site_images/print_this/cosmopolitan_logo.gif'
remove_tags_before = dict(name = 'h1', attrs={'class':'artikel'})
remove_tags_after = dict(name ='div', attrs={'class':'morePages'})
extra_css = '''
h2{font-family:Arial,Helvetica,sans-serif; font-size: x-small;}
h1{ font-family:Arial,Helvetica,sans-serif; font-size:x-large; font-weight:bold;}
'''
remove_tags = [ dict(id='strong'),
dict(title='strong'),
dict(name='span'),
dict(name='li', attrs={'class':'large'}),
dict(name='ul', attrs={'class':'articleImagesPortrait clearfix'}),
dict(name='p', attrs={'class':'external'}),
dict(name='a', attrs={'target':'_blank'}),]
feeds = [ (u'Komplett', u'http://www.cosmopolitan.de/rss/allgemein.xml'),
(u'Mode', u'http://www.cosmopolitan.de/rss/mode.xml'),
(u'Beauty', u'http://www.cosmopolitan.de/rss/beauty.xml'),
(u'Liebe&Sex', u'http://www.cosmopolitan.de/rss/liebe.xml'),
(u'Psychologie', u'http://www.cosmopolitan.de/rss/psychologie.xml'),
(u'Job&Karriere', u'http://www.cosmopolitan.de/rss/job.xml'),
(u'Lifestyle', u'http://www.cosmopolitan.de/rss/lifestyle.xml'),
(u'Shopping', u'http://www.cosmopolitan.de/rss/shopping.xml'),
(u'Bildergalerien', u'http://www.cosmopolitan.de/rss/bildgalerien.xml')]

View File

@ -0,0 +1,71 @@
from calibre.web.feeds.news import BasicNewsRecipe
class DilemaVeche(BasicNewsRecipe):
title = u'Dilema Veche' # apare vinerea, mai pe dupa-masa,depinde de Luiza cred (care se semneaza ca fiind creatorul fiecarui articol in feed-ul RSS)
__author__ = 'song2' # inspirat din scriptul pentru Le Monde. Inspired from the Le Monde script
description = '"Sint vechi, domnule!" (I.L. Caragiale)'
publisher = 'Adevarul Holding'
oldest_article = 7
max_articles_per_feed = 200
encoding = 'utf8'
language = 'ro'
masthead_url = 'http://www.dilemaveche.ro/sites/all/themes/dilema/theme/dilema_two/layouter/dilema_two_homepage/logo.png'
publication_type = 'magazine'
feeds = [
('Editoriale si opinii - Situatiunea', 'http://www.dilemaveche.ro/taxonomy/term/37/0/feed'),
('Editoriale si opinii - Pe ce lume traim', 'http://www.dilemaveche.ro/taxonomy/term/38/0/feed'),
('Editoriale si opinii - Bordeie si obiceie', 'http://www.dilemaveche.ro/taxonomy/term/44/0/feed'),
('Editoriale si opinii - Talc Show', 'http://www.dilemaveche.ro/taxonomy/term/44/0/feed'),
('Tema saptamanii', 'http://www.dilemaveche.ro/taxonomy/term/19/0/feed'),
('La zi in cultura - Dilema va recomanda', 'http://www.dilemaveche.ro/taxonomy/term/58/0/feed'),
('La zi in cultura - Carte', 'http://www.dilemaveche.ro/taxonomy/term/14/0/feed'),
('La zi in cultura - Film', 'http://www.dilemaveche.ro/taxonomy/term/13/0/feed'),
('La zi in cultura - Muzica', 'http://www.dilemaveche.ro/taxonomy/term/1341/0/feed'),
('La zi in cultura - Arte performative', 'http://www.dilemaveche.ro/taxonomy/term/1342/0/feed'),
('La zi in cultura - Arte vizuale', 'http://www.dilemaveche.ro/taxonomy/term/1512/0/feed'),
('Societate - Ieri cu vedere spre azi', 'http://www.dilemaveche.ro/taxonomy/term/15/0/feed'),
('Societate - Din polul opus', 'http://www.dilemaveche.ro/taxonomy/term/41/0/feed'),
('Societate - Mass comedia', 'http://www.dilemaveche.ro/taxonomy/term/43/0/feed'),
('Societate - La singular si la plural', 'http://www.dilemaveche.ro/taxonomy/term/42/0/feed'),
('Oameni si idei - Educatie', 'http://www.dilemaveche.ro/taxonomy/term/46/0/feed'),
('Oameni si idei - Polemici si dezbateri', 'http://www.dilemaveche.ro/taxonomy/term/48/0/feed'),
('Oameni si idei - Stiinta si tehnologie', 'http://www.dilemaveche.ro/taxonomy/term/46/0/feed'),
('Dileme on-line', 'http://www.dilemaveche.ro/taxonomy/term/005/0/feed')
]
remove_tags_before = dict(name='div',attrs={'class':'spacer_10'})
remove_tags = [
dict(name='div', attrs={'class':'art_related_left'}),
dict(name='div', attrs={'class':'controale'}),
dict(name='div', attrs={'class':'simple_overlay'}),
]
remove_tags_after = [dict(id='facebookLike')]
remove_javascript = True
no_stylesheets = True
remove_empty_feeds = True
extra_css = """
body{font-family: Georgia,Times,serif }
img{margin-bottom: 0.4em; display:block}
"""
def get_cover_url(self):
cover_url = None
soup = self.index_to_soup('http://dilemaveche.ro')
link_item = soup.find('div',attrs={'class':'box_dr_pdf_picture'})
if link_item and link_item.a:
cover_url = link_item.a['href']
br = BasicNewsRecipe.get_browser()
try:
br.open(cover_url)
except: #daca nu gaseste pdf-ul
self.log("\nPDF indisponibil")
link_item = soup.find('div',attrs={'class':'box_dr_pdf_picture'})
if link_item and link_item.img:
cover_url = link_item.img['src']
br = BasicNewsRecipe.get_browser()
try:
br.open(cover_url)
except: #daca nu gaseste nici imaginea mica mica
print('Mama lor de nenorociti! nu este nici pdf nici imagine')
cover_url ='http://www.dilemaveche.ro/sites/all/themes/dilema/theme/dilema_two/layouter/dilema_two_homepage/logo.png'
return cover_url
cover_margins = (10, 15, '#ffffff')

View File

@ -37,7 +37,7 @@ class DN_se(BasicNewsRecipe):
,(u'Kultur' , u'http://www.dn.se/kultur-rss' )
]
keep_only_tags = [dict(name='div', attrs={'id':'article'})]
keep_only_tags = [dict(name='div', attrs={'id':'article-content'})]
remove_tags_before = dict(name='h1')
remove_tags_after = dict(name='div',attrs={'id':'byline'})
remove_tags = [

View File

@ -1,19 +1,21 @@
import re
from calibre.web.feeds.news import BasicNewsRecipe
class AdvancedUserRecipe1302341394(BasicNewsRecipe):
title = u'DvhN'
oldest_article = 1
__author__ = 'Reijndert'
oldest_article = 7
max_articles_per_feed = 200
__author__ = 'Reijndert'
no_stylesheets = True
cover_url = 'http://www.dvhn.nl/template/Dagblad_v2.0/gfx/logo_DvhN.gif'
cover_url = 'http://members.home.nl/apm.de.haas/calibre/DvhN.jpg'
language = 'nl'
country = 'NL'
version = 1
publisher = u'Dagblad van het Noorden'
category = u'Nieuws'
description = u'Nieuws uit Noord Nederland'
timefmt = ' %Y-%m-%d (%a)'
keep_only_tags = [dict(name='div', attrs={'id':'fullPicture'})
@ -21,11 +23,26 @@ class AdvancedUserRecipe1302341394(BasicNewsRecipe):
]
remove_tags = [
dict(name=['object','link','iframe','base'])
,dict(name='span',attrs={'class':'copyright'})
dict(name='span',attrs={'class':'location'})
]
feeds = [(u'Drenthe', u'http://www.dvhn.nl/nieuws/drenthe/index.jsp?service=rss'), (u'Groningen', u'http://www.dvhn.nl/nieuws/groningen/index.jsp?service=rss'), (u'Nederland', u'http://www.dvhn.nl/nieuws/nederland/index.jsp?service=rss'), (u'Wereld', u'http://www.dvhn.nl/nieuws/wereld/index.jsp?service=rss'), (u'Economie', u'http://www.dvhn.nl/nieuws/economie/index.jsp?service=rss'), (u'Sport', u'http://www.dvhn.nl/nieuws/sport/index.jsp?service=rss'), (u'Cultuur', u'http://www.dvhn.nl/nieuws/kunst/index.jsp?service=rss'), (u'24 Uur', u'http://www.dvhn.nl/nieuws/24uurdvhn/index.jsp?service=rss&selectiontype=last24hours')]
preprocess_regexps = [
(re.compile(r'<a.*?>'), lambda h1: '')
,(re.compile(r'</a>'), lambda h2: '')
,(re.compile(r'Word vriend van Dagblad van het Noorden op Facebook'), lambda h3: '')
,(re.compile(r'Volg Dagblad van het Noorden op Twitter'), lambda h3: '')
]
feeds = [(u'Drenthe', u'http://www.dvhn.nl/nieuws/drenthe/index.jsp?service=rss')
, (u'Groningen', u'http://www.dvhn.nl/nieuws/groningen/index.jsp?service=rss')
, (u'Nederland', u'http://www.dvhn.nl/nieuws/nederland/index.jsp?service=rss')
, (u'Wereld', u'http://www.dvhn.nl/nieuws/wereld/index.jsp?service=rss')
, (u'Economie', u'http://www.dvhn.nl/nieuws/economie/index.jsp?service=rss')
, (u'Sport', u'http://www.dvhn.nl/nieuws/sport/index.jsp?service=rss')
, (u'Cultuur', u'http://www.dvhn.nl/nieuws/kunst/index.jsp?service=rss')
, (u'24 Uur', u'http://www.dvhn.nl/nieuws/24uurdvhn/index.jsp?service=rss&selectiontype=last24hours')
]
extra_css = '''
body {font-family: verdana, arial, helvetica, geneva, sans-serif;}

View File

@ -20,7 +20,7 @@ class Economist(BasicNewsRecipe):
INDEX = 'http://www.economist.com/printedition'
description = ('Global news and current affairs from a European'
' perspective. Best downloaded on Friday mornings (GMT)')
extra_css = '.headline {font-size: x-large;} \n h2 { font-size: small; } \n h1 { font-size: medium; }'
oldest_article = 7.0
cover_url = 'http://www.economist.com/images/covers/currentcoverus_large.jpg'
remove_tags = [

View File

@ -14,7 +14,7 @@ class Economist(BasicNewsRecipe):
description = ('Global news and current affairs from a European'
' perspective. Best downloaded on Friday mornings (GMT).'
' Much slower than the print edition based version.')
extra_css = '.headline {font-size: x-large;} \n h2 { font-size: small; } \n h1 { font-size: medium; }'
oldest_article = 7.0
cover_url = 'http://www.economist.com/images/covers/currentcoverus_large.jpg'
remove_tags = [

View File

@ -1,6 +1,6 @@
__license__ = 'GPL v3'
__copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>'
__copyright__ = '2009-2011, Darko Miletic <darko.miletic at gmail.com>'
'''
elmundo.es
'''
@ -10,15 +10,24 @@ from calibre.web.feeds.news import BasicNewsRecipe
class ElMundo(BasicNewsRecipe):
title = 'El Mundo'
__author__ = 'Darko Miletic'
description = 'News from Spain'
publisher = 'El Mundo'
description = 'Lider de informacion en espaniol'
publisher = 'Unidad Editorial Informacion General S.L.U.'
category = 'news, politics, Spain'
oldest_article = 2
max_articles_per_feed = 100
no_stylesheets = True
use_embedded_content = False
encoding = 'iso8859_15'
language = 'es'
language = 'es_ES'
masthead_url = 'http://estaticos03.elmundo.es/elmundo/iconos/v4.x/v4.01/bg_h1.png'
publication_type = 'newspaper'
extra_css = """
body{font-family: Arial,Helvetica,sans-serif}
.metadata_noticia{font-size: small}
h1,h2,h3,h4,h5,h6,.subtitulo {color: #3F5974}
.hora{color: red}
.update{color: gray}
"""
conversion_options = {
'comments' : description
@ -30,22 +39,31 @@ class ElMundo(BasicNewsRecipe):
keep_only_tags = [dict(name='div', attrs={'class':'noticia'})]
remove_tags_before = dict(attrs={'class':['titular','antetitulo'] })
remove_tags_after = dict(name='div' , attrs={'id':['desarrollo_noticia','tamano']})
remove_attributes = ['lang','border']
remove_tags = [
dict(name='div', attrs={'class':['herramientas','publicidad_google']})
,dict(name='div', attrs={'id':'modulo_multimedia' })
,dict(name='ul', attrs={'class':'herramientas' })
,dict(name=['object','link'])
,dict(name=['object','link','embed','iframe','base','meta'])
]
feeds = [
(u'Portada' , u'http://rss.elmundo.es/rss/descarga.htm?data2=4' )
,(u'Deportes' , u'http://rss.elmundo.es/rss/descarga.htm?data2=14')
,(u'Economia' , u'http://rss.elmundo.es/rss/descarga.htm?data2=7' )
,(u'Espana' , u'http://rss.elmundo.es/rss/descarga.htm?data2=8' )
,(u'Internacional' , u'http://rss.elmundo.es/rss/descarga.htm?data2=9' )
,(u'Cultura' , u'http://rss.elmundo.es/rss/descarga.htm?data2=6' )
,(u'Ciencia/Ecologia', u'http://rss.elmundo.es/rss/descarga.htm?data2=5' )
,(u'Comunicacion' , u'http://rss.elmundo.es/rss/descarga.htm?data2=26')
,(u'Television' , u'http://rss.elmundo.es/rss/descarga.htm?data2=76')
(u'Portada' , u'http://estaticos.elmundo.es/elmundo/rss/portada.xml' )
,(u'Deportes' , u'http://estaticos.elmundo.es/elmundodeporte/rss/portada.xml')
,(u'Economia' , u'http://estaticos.elmundo.es/elmundo/rss/economia.xml' )
,(u'Espana' , u'http://estaticos.elmundo.es/elmundo/rss/espana.xml' )
,(u'Internacional' , u'http://estaticos.elmundo.es/elmundo/rss/internacional.xml' )
,(u'Cultura' , u'http://estaticos.elmundo.es/elmundo/rss/cultura.xml' )
,(u'Ciencia/Ecologia', u'http://estaticos.elmundo.es/elmundo/rss/ciencia.xml' )
,(u'Comunicacion' , u'http://estaticos.elmundo.es/elmundo/rss/comunicacion.xml' )
,(u'Television' , u'http://estaticos.elmundo.es/elmundo/rss/television.xml' )
]
def preprocess_html(self, soup):
for item in soup.findAll(style=True):
del item['style']
return soup
def get_article_url(self, article):
return article.get('guid', None)

72
recipes/express_de.recipe Normal file
View File

@ -0,0 +1,72 @@
from calibre.web.feeds.news import BasicNewsRecipe
class AdvancedUserRecipe1303841067(BasicNewsRecipe):
title = u'Express.de'
__author__ = 'schuster'
oldest_article = 2
max_articles_per_feed = 50
no_stylesheets = True
use_embedded_content = False
language = 'de'
extra_css = '''
h2{font-family:Arial,Helvetica,sans-serif; font-size: x-small;}
h1{ font-family:Arial,Helvetica,sans-serif; font-size:x-large; font-weight:bold;}
'''
remove_javascript = True
remove_tags_befor = [dict(name='div', attrs={'class':'Datum'})]
remove_tags_after = [dict(name='div', attrs={'class':'MoreNews'})]
remove_tags = [dict(id='kalaydo'),
dict(id='Header'),
dict(id='Searchline'),
dict(id='MainNav'),
dict(id='Logo'),
dict(id='MainLinkSpacer'),
dict(id='MainLinks'),
dict(id='ContainerPfad'), #neu
dict(title='Diese Seite Bookmarken'),
dict(name='span'),
dict(name='div', attrs={'class':'spacer_leftneu'}),
dict(name='div', attrs={'class':'button kalaydologo'}),
dict(name='div', attrs={'class':'button stellenneu'}),
dict(name='div', attrs={'class':'button autoneu'}),
dict(name='div', attrs={'class':'button immobilienneu'}),
dict(name='div', attrs={'class':'button kleinanzeigen'}),
dict(name='div', attrs={'class':'button tiereneu'}),
dict(name='div', attrs={'class':'button ferienwohnungen'}),
dict(name='div', attrs={'class':'button inserierenneu'}),
dict(name='div', attrs={'class':'spacer_rightneu'}),
dict(name='div', attrs={'class':'spacer_rightcorner'}),
dict(name='div', attrs={'class':'HeaderMetaNav'}),
dict(name='div', attrs={'class':'HeaderSearchOption'}),
dict(name='div', attrs={'class':'HeaderSearch'}),
dict(name='div', attrs={'class':'sbutton'}),
dict(name='div', attrs={'class':'active'}),
dict(name='div', attrs={'class':'MoreNews'}), #neu
dict(name='div', attrs={'class':'ContentBoxSubline'}) #neu
]
def preprocess_html(self, soup):
for alink in soup.findAll('a'):
if alink.string is not None:
tstr = alink.string
alink.replaceWith(tstr)
return soup
feeds = [(u'Top-Themen', u'http://www.express.de/home/-/2126/2126/-/view/asFeed/-/index.xml'),
(u'Regional - Köln', u'http://www.express.de/regional/koeln/-/2856/2856/-/view/asFeed/-/index.xml'),
(u'Regional - Bonn', u'http://www.express.de/regional/bonn/-/2860/2860/-/view/asFeed/-/index.xml'),
(u'Regional - Düsseldorf', u'http://www.express.de/regional/duesseldorf/-/2858/2858/-/view/asFeed/-/index.xml'),
(u'Regional - Region', u'http://www.express.de/regional/-/2178/2178/-/view/asFeed/-/index.xml'),
(u'Sport-News', u'http://www.express.de/sport/-/2176/2176/-/view/asFeed/-/index.xml'),
(u'Fussball-News', u'http://www.express.de/sport/fussball/-/3186/3186/-/view/asFeed/-/index.xml'),
(u'1.FC Köln News', u'http://www.express.de/sport/fussball/fc-koeln/-/3192/3192/-/view/asFeed/-/index.xml'),
(u'Alemannia Aachen News', u'http://www.express.de/sport/fussball/alemannia/-/3290/3290/-/view/asFeed/-/index.xml'),
(u'Borussia M~Gladbach', u'http://www.express.de/sport/fussball/gladbach/-/3286/3286/-/view/asFeed/-/index.xml'),
(u'Fortuna D~Dorf', u'http://www.express.de/sport/fussball/fortuna/-/3292/3292/-/view/asFeed/-/index.xml'),
(u'Basketball News', u'http://www.express.de/sport/basketball/-/3190/3190/-/view/asFeed/-/index.xml'),
(u'Big Brother', u'http://www.express.de/news/promi-show/big-brother/-/2402/2402/-/view/asFeed/-/index.xml'),
]

View File

@ -1,16 +1,16 @@
__license__ = 'GPL v3'
__copyright__ = '2008-2009, Kovid Goyal <kovid at kovidgoyal.net>, Darko Miletic <darko at gmail.com>'
__copyright__ = '2008-2011, Kovid Goyal <kovid at kovidgoyal.net>, Darko Miletic <darko at gmail.com>'
'''
Profile to download FAZ.net
Profile to download FAZ.NET
'''
from calibre.web.feeds.news import BasicNewsRecipe
class FazNet(BasicNewsRecipe):
title = 'FAZ NET'
title = 'FAZ.NET'
__author__ = 'Kovid Goyal, Darko Miletic'
description = 'Frankfurter Allgemeine Zeitung'
publisher = 'FAZ Electronic Media GmbH'
publisher = 'Frankfurter Allgemeine Zeitung GmbH'
category = 'news, politics, Germany'
use_embedded_content = False
language = 'de'
@ -32,15 +32,27 @@ class FazNet(BasicNewsRecipe):
remove_tags = [
dict(name=['object','link','embed','base'])
,dict(name='div', attrs={'class':['LinkBoxModulSmall','ModulVerlagsInfo']})
,dict(name='div',
attrs={'class':['LinkBoxModulSmall','ModulVerlagsInfo',
'ArtikelServices', 'ModulLesermeinungenFooter',
'ModulArtikelServices', 'BoxTool Aufklappen_Grau',
'SocialMediaUnten', ]}),
dict(id=['KurzLinkMenu', 'ArtikelServicesMenu']),
]
feeds = [ ('FAZ.NET', 'http://www.faz.net/s/Rub/Tpl~Epartner~SRss_.xml') ]
def print_version(self, url):
article, sep, rest = url.partition('?')
return article.replace('.html', '~Afor~Eprint.html')
feeds = [
('FAZ.NET Aktuell', 'http://www.faz.net/s/RubF3CE08B362D244869BE7984590CB6AC1/Tpl~Epartner~SRss_.xml'),
('Politik', 'http://www.faz.net/s/RubA24ECD630CAE40E483841DB7D16F4211/Tpl~Epartner~SRss_.xml'),
('Wirtschaft', 'http://www.faz.net/s/RubC9401175958F4DE28E143E68888825F6/Tpl~Epartner~SRss_.xml'),
('Feuilleton', 'http://www.faz.net/s/RubCC21B04EE95145B3AC877C874FB1B611/Tpl~Epartner~SRss_.xml'),
('Sport', 'http://www.faz.net/s/Rub9F27A221597D4C39A82856B0FE79F051/Tpl~Epartner~SRss_.xml'),
('Gesellschaft', 'http://www.faz.net/s/Rub02DBAA63F9EB43CEB421272A670A685C/Tpl~Epartner~SRss_.xml'),
('Finanzen', 'http://www.faz.net/s/Rub4B891837ECD14082816D9E088A2D7CB4/Tpl~Epartner~SRss_.xml'),
('Wissen', 'http://www.faz.net/s/Rub7F4BEE0E0C39429A8565089709B70C44/Tpl~Epartner~SRss_.xml'),
('Reise', 'http://www.faz.net/s/RubE2FB5CA667054BDEA70FB3BC45F8D91C/Tpl~Epartner~SRss_.xml'),
('Technik & Motor', 'http://www.faz.net/s/Rub01E4D53776494844A85FDF23F5707AD8/Tpl~Epartner~SRss_.xml'),
('Beruf & Chance', 'http://www.faz.net/s/RubB1E10A8367E8446897468EDAA6EA0504/Tpl~Epartner~SRss_.xml')
]
def preprocess_html(self, soup):
mtag = '<meta http-equiv="Content-Type" content="text/html; charset=utf-8"/>'

48
recipes/focus_de.recipe Normal file
View File

@ -0,0 +1,48 @@
from calibre.web.feeds.news import BasicNewsRecipe
class AdvancedUserRecipe1305567197(BasicNewsRecipe):
title = u'Focus (DE)'
__author__ = 'Anonymous'
language = 'de'
oldest_article = 7
max_articles_per_feed = 100
no_stylesheets = True
use_embedded_content = False
remove_javascript = True
def print_version(self, url):
return url + '?drucken=1'
keep_only_tags = [
dict(name='div', attrs={'id':['article']}) ]
remove_tags = [dict(name='div', attrs={'class':'sidebar'}),
dict(name='div', attrs={'class':'commentForm'}),
dict(name='div', attrs={'class':'comment clearfix oid-3534591 open'}),
dict(name='div', attrs={'class':'similarityBlock'}),
dict(name='div', attrs={'class':'footer'}),
dict(name='div', attrs={'class':'getMoreComments'}),
dict(name='div', attrs={'class':'moreComments'}),
dict(name='div', attrs={'class':'ads'}),
dict(name='div', attrs={'class':'articleContent'}),
]
remove_tags_after = [
dict(name='div',attrs={'class':['commentForm','title', 'actions clearfix']})
]
feeds = [ (u'Eilmeldungen', u'http://rss2.focus.de/c/32191/f/533875/index.rss'),
(u'Auto-News', u'http://rss2.focus.de/c/32191/f/443320/index.rss'),
(u'Digital-News', u'http://rss2.focus.de/c/32191/f/443315/index.rss'),
(u'Finanzen-News', u'http://rss2.focus.de/c/32191/f/443317/index.rss'),
(u'Gesundheit-News', u'http://rss2.focus.de/c/32191/f/443314/index.rss'),
(u'Immobilien-News', u'http://rss2.focus.de/c/32191/f/443318/index.rss'),
(u'Kultur-News', u'http://rss2.focus.de/c/32191/f/443321/index.rss'),
(u'Panorama-News', u'http://rss2.focus.de/c/32191/f/533877/index.rss'),
(u'Politik-News', u'http://rss2.focus.de/c/32191/f/443313/index.rss'),
(u'Reisen-News', u'http://rss2.focus.de/c/32191/f/443316/index.rss'),
(u'Sport-News', u'http://rss2.focus.de/c/32191/f/443319/index.rss'),
(u'Wissen-News', u'http://rss2.focus.de/c/32191/f/533876/index.rss'),
]

View File

@ -0,0 +1,35 @@
from calibre.web.feeds.recipes import BasicNewsRecipe
class AdvancedUserRecipe(BasicNewsRecipe):
title = u'Frankfurter Rundschau'
__author__ = 'schuster'
oldest_article = 1
max_articles_per_feed = 100
no_stylesheets = True
use_embedded_content = False
language = 'de'
remove_javascript = True
cover_url = 'http://www.fr-online.de/image/view/-/1474018/data/823538/-/logo.png'
extra_css = '''
h1{font-family:Arial,Helvetica,sans-serif; font-weight:bold;font-size:large;}
h4{font-family:Arial,Helvetica,sans-serif; font-weight:normal;font-size:small;}
img {min-width:300px; max-width:600px; min-height:300px; max-height:800px}
p{font-family:Arial,Helvetica,sans-serif;font-size:small;}
body{font-family:Helvetica,Arial,sans-serif;font-size:small;}
'''
feeds = [(u'Startseite', u'http://www.fr-online.de/home/-/1472778/1472778/-/view/asFeed/-/index.xml'),
(u'Politik', u'http://www.fr-online.de/politik/-/1472596/1472596/-/view/asFeed/-/index.xml'),
(u'Meinungen', u'http://www.fr-online.de/politik/meinung/-/1472602/1472602/-/view/asFeed/-/index.xml'),
(u'Wirtschaft', u'http://www.fr-online.de/wirtschaft/-/1472780/1472780/-/view/asFeed/-/index.xml'),
(u'Sport', u'http://www.fr-online.de/sport/-/1472784/1472784/-/view/asFeed/-/index.xml'),
(u'Kultur', u'http://www.fr-online.de/kultur/-/1472786/1472786/-/view/asFeed/-/index.xml'),
(u'Panorama', u'http://www.fr-online.de/panorama/-/1472782/1472782/-/view/asFeed/-/index.xml'),
(u'Digital', u'http://www.fr-online.de/digital/-/1472406/1472406/-/view/asFeed/-/index.xml'),
(u'Wissenschaft', u'http://www.fr-online.de/wissenschaft/-/1472788/1472788/-/view/asFeed/-/index.xml')
]
def print_version(self, url):
return url.replace('index.html', 'view/printVersion/-/index.html')

38
recipes/glamour.recipe Normal file
View File

@ -0,0 +1,38 @@
from calibre.web.feeds.news import BasicNewsRecipe
class AdvancedUserRecipe1305547242(BasicNewsRecipe):
title = u'Glamour (US)'
oldest_article = 21
max_articles_per_feed = 100
no_stylesheets = True
use_embedded_content = False
language = 'en'
remove_javascript = True
__author__ = 'Anonymous'
remove_tags = [dict(name='div', attrs={'class':'articles_footer', 'class':'printoptions'})]
def print_version(self, url):
return url + '?printable=true'
def preprocess_html(self, soup):
for alink in soup.findAll('a'):
if alink.string is not None:
tstr = alink.string
alink.replaceWith(tstr)
return soup
feeds = [ (u'All Fashion', u'http://feeds.glamour.com/glamour/all_fashion'),
(u'All Beauty', u'http://feeds.glamour.com/glamour/all_beauty'),
(u'All Sex, Love & Life', u'http://feeds.glamour.com/glamour/sex_love_life'),
(u'All Health & Fitness', u'http://feeds.glamour.com/glamour/health_fitness'),
(u'Shopping', u'http://feeds.glamour.com/glamour/shopping'),
(u'Slaves to Fashion blog', u'http://feeds.glamour.com/glamour/slavestofashion'),
(u'The Girls in the Beauty Department', u'http://feeds.glamour.com/glamour/thegirlsinthebeautydepartment'),
(u'Smitten blog', u'http://feeds.glamour.com/glamour/smitten'),
(u'Save the Date', u'http://feeds.feedburner.com/glamour/save-the-date'),
(u'Single-ish blog', u'http://feeds.glamour.com/glamour/glamoursingle-ish'),
(u'Save the Date', u'http://feeds.feedburner.com/glamour/save-the-date'),
(u'Vitamin G blog', u'http://feeds.glamour.com/glamour/vitamin-g'),
(u'Margarita Shapes Up blog', u'http://feeds.glamour.com/glamour/margaritashapesup'),
(u'Little Miss Fortune blog', u'http://feeds.glamour.com/glamour/little-miss-fortune'),
]

View File

@ -6,13 +6,13 @@ __copyright__ = 'Copyright 2010 Starson17'
www.gocomics.com
'''
from calibre.web.feeds.news import BasicNewsRecipe
import mechanize
import mechanize, re
class GoComics(BasicNewsRecipe):
title = 'GoComics'
__author__ = 'Starson17'
__version__ = '1.03'
__date__ = '09 October 2010'
__version__ = '1.05'
__date__ = '19 may 2011'
description = u'200+ Comics - Customize for more days/comics: Defaults to 7 days, 25 comics - 20 general, 5 editorial.'
category = 'news, comics'
language = 'en'
@ -20,6 +20,7 @@ class GoComics(BasicNewsRecipe):
no_stylesheets = True
remove_javascript = True
cover_url = 'http://paulbuckley14059.files.wordpress.com/2008/06/calvin-and-hobbes.jpg'
remove_attributes = ['style']
####### USER PREFERENCES - COMICS, IMAGE SIZE AND NUMBER OF COMICS TO RETRIEVE ########
# num_comics_to_get - I've tried up to 99 on Calvin&Hobbes
@ -40,6 +41,8 @@ class GoComics(BasicNewsRecipe):
remove_tags = [dict(name='a', attrs={'class':['beginning','prev','cal','next','newest']}),
dict(name='div', attrs={'class':['tag-wrapper']}),
dict(name='a', attrs={'href':re.compile(r'.*mutable_[0-9]+', re.IGNORECASE)}),
dict(name='img', attrs={'src':re.compile(r'.*mutable_[0-9]+', re.IGNORECASE)}),
dict(name='ul', attrs={'class':['share-nav','feature-nav']}),
]

View File

@ -1,83 +1,70 @@
#!/usr/bin/env python
from calibre.web.feeds.recipes import BasicNewsRecipe
class AdvancedUserRecipe1303841067(BasicNewsRecipe):
from calibre.web.feeds.news import BasicNewsRecipe
class golem_ger(BasicNewsRecipe):
title = u'Golem.de'
language = 'de'
__author__ = 'Kovid Goyal'
__author__ = 'schuster'
oldest_article = 7
max_articles_per_feed = 100
language = 'de'
lang = 'de-DE'
no_stylesheets = True
encoding = 'iso-8859-1'
recursions = 1
match_regexps = [r'http://www.golem.de/.*.html']
keep_only_tags = [
dict(name='h1', attrs={'class':'artikelhead'}),
dict(name='p', attrs={'class':'teaser'}),
dict(name='div', attrs={'class':'artikeltext'}),
dict(name='h2', attrs={'id':'artikelhead'}),
]
remove_tags = [
dict(name='div', attrs={'id':['similarContent','topContentWrapper','storycarousel','aboveFootPromo','comments','toolbar','breadcrumbs','commentlink','sidebar','rightColumn']}),
dict(name='div', attrs={'class':['gg_embeddedSubText','gg_embeddedIndex gg_solid','gg_toOldGallery','golemGallery']}),
dict(name='img', attrs={'class':['gg_embedded','gg_embeddedIconRight gg_embeddedIconFS gg_cursorpointer']}),
dict(name='td', attrs={'class':['xsmall']}),
]
# remove_tags_after = [
# dict(name='div', attrs={'id':['contentad2']})
# ]
feeds = [
(u'Golem.de', u'http://rss.golem.de/rss.php?feed=ATOM1.0'),
(u'Audio/Video', u'http://rss.golem.de/rss.php?tp=av&feed=RSS2.0'),
(u'Foto', u'http://rss.golem.de/rss.php?tp=foto&feed=RSS2.0'),
(u'Games', u'http://rss.golem.de/rss.php?tp=games&feed=RSS2.0'),
(u'Internet', u'http://rss.golem.de/rss.php?tp=inet&feed=RSS1.0'),
(u'Mobil', u'http://rss.golem.de/rss.php?tp=mc&feed=ATOM1.0'),
(u'Internet', u'http://rss.golem.de/rss.php?tp=inet&feed=RSS1.0'),
(u'Politik/Recht', u'http://rss.golem.de/rss.php?tp=pol&feed=ATOM1.0'),
(u'Desktop-Applikationen', u'http://rss.golem.de/rss.php?tp=apps&feed=RSS2.0'),
(u'Software-Entwicklung', u'http://rss.golem.de/rss.php?tp=dev&feed=RSS2.0'),
(u'Wirtschaft', u'http://rss.golem.de/rss.php?tp=wirtschaft&feed=RSS2.0'),
(u'Hardware', u'http://rss.golem.de/rss.php?r=hw&feed=RSS2.0'),
(u'Software', u'http://rss.golem.de/rss.php?r=sw&feed=RSS2.0'),
(u'Networld', u'http://rss.golem.de/rss.php?r=nw&feed=RSS2.0'),
(u'Entertainment', u'http://rss.golem.de/rss.php?r=et&feed=RSS2.0'),
(u'TK', u'http://rss.golem.de/rss.php?r=tk&feed=RSS2.0'),
(u'E-Commerce', u'http://rss.golem.de/rss.php?r=ec&feed=RSS2.0'),
(u'Unternehmen/Maerkte', u'http://rss.golem.de/rss.php?r=wi&feed=RSS2.0')
]
feeds = [
(u'Golem.de', u'http://rss.golem.de/rss.php?feed=ATOM1.0'),
(u'Mobil', u'http://rss.golem.de/rss.php?tp=mc&feed=feed=RSS2.0'),
(u'OSS', u'http://rss.golem.de/rss.php?tp=oss&feed=RSS2.0'),
(u'Politik/Recht', u'http://rss.golem.de/rss.php?tp=pol&feed=RSS2.0'),
(u'Desktop-Applikationen', u'http://rss.golem.de/rss.php?tp=apps&feed=RSS2.0'),
(u'Software-Entwicklung', u'http://rss.golem.de/rss.php?tp=dev&feed=RSS2.0'),
]
max_articles_per_feed = 10
no_stylesheets = True
use_embedded_content = False
language = 'de'
cover_url = 'http://www.e-energy.de/images/logo_golem.jpg'
masthead_url = 'http://www.golem.de/staticrl/images/logo.png'
extra_css = '''
h1 {color:#0066CC;font-family:Arial,Helvetica,sans-serif; font-size:30px; font-size-adjust:none; font-stretch:normal; font-style:normal; font-variant:normal; font-weight:bold; line-height:20px;margin-bottom:2 em;}
h2 {color:#4D4D4D;font-family:Arial,Helvetica,sans-serif; font-size:22px; font-size-adjust:none; font-stretch:normal; font-style:normal; font-variant:normal; font-weight:bold; line-height:16px; }
h3 {color:#4D4D4D;font-family:Arial,Helvetica,sans-serif; font-size:x-small; font-size-adjust:none; font-stretch:normal; font-style:normal; font-variant:normal; font-weight:normal; line-height:5px;}
h4 {color:#333333; font-family:Arial,Helvetica,sans-serif;font-size:13px; font-size-adjust:none; font-stretch:normal; font-style:normal; font-variant:normal; font-weight:bold; line-height:13px; }
h5 {color:#333333; font-family:Arial,Helvetica,sans-serif; font-size:11px; font-size-adjust:none; font-stretch:normal; font-style:normal; font-variant:normal; font-weight:bold; line-height:11px; text-transform:uppercase;}
.teaser {font-style:italic;font-size:12pt;margin-bottom:15pt;}
.xsmall{font-style:italic;font-size:x-small;}
.td{font-style:italic;font-size:x-small;}
img {align:left;}
h2{font-family:Arial,Helvetica,sans-serif; font-size: x-small;}
h1{ font-family:Arial,Helvetica,sans-serif; font-size:x-large; font-weight:bold;}
'''
remove_javascript = True
remove_tags_befor = [dict(name='header', attrs={'class':'cluster-header'})]
remove_tags_after = [dict(name='p', attrs={'class':'meta'})]
remove_tags = [dict(rel='nofollow'),
dict(name='header', attrs={'id':'header'}),
dict(name='div', attrs={'class':'dh1'}),
dict(name='label', attrs={'class':'implied'}),
dict(name='section', attrs={'id':'comments'}),
dict(name='li', attrs={'class':'gg_prebackcounterItem'}),
dict(name='li', attrs={'class':'gg_prebackcounterItem gg_embeddedIndexCounter'}),
dict(name='img', attrs={'class':'gg_embeddedIconRight gg_embeddedIconFS gg_cursorpointer'}),
dict(name='div', attrs={'target':'_blank'})
]
def get_browser(self, *args, **kwargs):
from calibre import browser
kwargs['user_agent'] = 'mozilla'
return browser(*args, **kwargs)
def get_article_url(self, article):
return article.get('id', article.get('guid', None))
def preprocess_html(self, soup):
for alink in soup.findAll('a'):
if alink.string is not None:
tstr = alink.string
alink.replaceWith(tstr)
return soup
feeds = [(u'Audio/Video', u'http://rss.golem.de/rss.php?tp=av&feed=RSS2.0'),
(u'Foto', u'http://rss.golem.de/rss.php?tp=foto&feed=RSS2.0'),
(u'Games', u'http://rss.golem.de/rss.php?tp=games&feed=RSS2.0'),
(u'Handy', u'http://rss.golem.de/rss.php?tp=handy&feed=RSS2.0'),
(u'Internet', u'http://rss.golem.de/rss.php?tp=inet&feed=RSS2.0'),
(u'Mobile', u'http://rss.golem.de/rss.php?tp=mc&feed=RSS2.0'),
(u'OSS', u'http://rss.golem.de/rss.php?tp=oss&feed=RSS2.0'),
(u'Politik/Recht', u'http://rss.golem.de/rss.php?tp=pol&feed=RSS2.0'),
(u'Security', u'http://rss.golem.de/rss.php?tp=sec&feed=RSS2.0'),
(u'Desktop-Applikationen', u'http://rss.golem.de/rss.php?tp=apps&feed=RSS2.0'),
(u'Software-Entwicklung', u'http://rss.golem.de/rss.php?tp=dev&feed=RSS2.0'),
(u'Wirtschaft', u'http://rss.golem.de/rss.php?tp=wirtschaft&feed=RSS2.0'),
(u'Hardware', u'http://rss.golem.de/rss.php?r=hw&feed=RSS2.0'),
(u'Software', u'http://rss.golem.de/rss.php?r=sw&feed=RSS2.0'),
(u'Networld', u'http://rss.golem.de/rss.php?r=nw&feed=RSS2.0'),
(u'Entertainment', u'http://rss.golem.de/rss.php?r=et&feed=RSS2.0'),
(u'TK', u'http://rss.golem.de/rss.php?r=tk&feed=RSS2.0'),
(u'Wirtschaft', u'http://rss.golem.de/rss.php?r=wi&feed=RSS2.0'),
(u'E-Commerce', u'http://rss.golem.de/rss.php?r=ec&feed=RSS2.0')
]

View File

@ -0,0 +1,31 @@
from calibre.web.feeds.news import BasicNewsRecipe
class AdvancedUserRecipe1305547242(BasicNewsRecipe):
title = u'Good House Keeping'
language = 'en'
__author__ = 'Anonymous'
oldest_article = 7
max_articles_per_feed = 100
no_stylesheets = True
use_embedded_content = False
remove_javascript = True
def print_version(self,url):
segments = url.split('/')
printURL = '/'.join(segments[0:3]) + '/print-this/' + '/'.join(segments[4:])
return printURL
def preprocess_html(self, soup):
for alink in soup.findAll('a'):
if alink.string is not None:
tstr = alink.string
alink.replaceWith(tstr)
return soup
feeds = [ (u'Recipes & Entertaining', u'http://www.goodhousekeeping.com/food/food-rss/?src=rss'),
(u'Home & House', u'http://www.goodhousekeeping.com/home/home-rss/?src=rss'),
(u'Diet & Health', u'http://www.goodhousekeeping.com/health/health-rss/?src=rss'),
(u'Beauty & Style', u'http://www.goodhousekeeping.com/beauty/beauty-rss/?src=rss'),
(u'Family & Pets', u'http://www.goodhousekeeping.com/family/family-rss/?src=rss'),
(u'Saving Money', u'http://www.goodhousekeeping.com/money/money-rss/?src=rss'),
]

View File

@ -0,0 +1,32 @@
from calibre.web.feeds.news import BasicNewsRecipe
class AdvancedUserRecipe1305547242(BasicNewsRecipe):
title = u'Good to Know (uk)'
oldest_article = 14
max_articles_per_feed = 100
no_stylesheets = True
use_embedded_content = False
remove_javascript = True
__author__ = 'Anonymous'
language = 'en_GB'
remove_tags = [dict(name='div', attrs={'class':'articles_footer', 'class':'printoptions'})]
def print_version(self, url):
return url + '/print/1'
def preprocess_html(self, soup):
for alink in soup.findAll('a'):
if alink.string is not None:
tstr = alink.string
alink.replaceWith(tstr)
return soup
feeds = [ (u'Family Conception Advice', u'http://www.goodtoknow.co.uk/feeds/family.rss'),
(u'Family Health Advice', u'http://www.goodtoknow.co.uk/feeds/health.rss'),
(u'Diet Advice', u'http://www.goodtoknow.co.uk/feeds/diet.rss'),
(u'Food Advice', u'http://www.goodtoknow.co.uk/feeds/food.rss'),
(u'Sex Advice', u'http://www.goodtoknow.co.uk/feeds/sex.rss'),
(u'Easy Exercise', u'http://www.goodtoknow.co.uk/feeds/easyexercise.rss'),
(u'Recipes', u'http://www.goodtoknow.co.uk/feeds/recipes.rss'),
(u'Food Quick-tips', u'http://www.goodtoknow.co.uk/feeds/foodquicktips.rss'),
]

36
recipes/grrm.recipe Normal file
View File

@ -0,0 +1,36 @@
__license__ = 'GPL v3'
__copyright__ = '2011, Darko Miletic <darko.miletic at gmail.com>'
'''
grrm.livejournal.com
'''
from calibre.web.feeds.news import BasicNewsRecipe
class NotABlog(BasicNewsRecipe):
title = 'Not A Blog - George R.R. Martin'
__author__ = 'Darko Miletic'
description = 'George R.R. Martin'
oldest_article = 15
max_articles_per_feed = 100
language = 'en'
encoding = 'utf-8'
no_stylesheets = True
use_embedded_content = True
publication_type = 'blog'
conversion_options = {
'comment' : description
, 'tags' : 'sf, fantasy, game of thrones'
, 'publisher': 'George R.R. Martin'
, 'language' : language
}
feeds = [(u'Posts', u'http://grrm.livejournal.com/data/rss')]
def preprocess_html(self, soup):
for item in soup.findAll(style=True):
del item['style']
return self.adeify_images(soup)

View File

@ -87,7 +87,13 @@ class Guardian(BasicNewsRecipe):
idx = soup.find('div', id='book-index')
for s in idx.findAll('strong', attrs={'class':'book'}):
a = s.find('a', href=True)
yield (self.tag_to_string(a), a['href'])
section_title = self.tag_to_string(a)
if not section_title in self.ignore_sections:
prefix = ''
if section_title != 'Main section':
prefix = section_title + ': '
for subsection in s.parent.findAll('a', attrs={'class':'book-section'}):
yield (prefix + self.tag_to_string(subsection), subsection['href'])
def find_articles(self, url):
soup = self.index_to_soup(url)
@ -114,10 +120,7 @@ class Guardian(BasicNewsRecipe):
try:
feeds = []
for title, href in self.find_sections():
if not title in self.ignore_sections:
feeds.append((title, list(self.find_articles(href))))
feeds.append((title, list(self.find_articles(href))))
return feeds
except:
raise NotImplementedError

View File

@ -0,0 +1,52 @@
from calibre.web.feeds.news import BasicNewsRecipe
class AdvancedUserRecipe(BasicNewsRecipe):
title = 'Heise-online'
description = 'News vom Heise-Verlag'
__author__ = 'schuster'
use_embedded_content = False
language = 'de'
oldest_article = 2
max_articles_per_feed = 35
rescale_images = True
remove_empty_feeds = True
timeout = 5
no_stylesheets = True
remove_tags_after = dict(name ='p', attrs={'class':'editor'})
remove_tags = [dict(id='navi_top_container'),
dict(id='navi_bottom'),
dict(id='mitte_rechts'),
dict(id='navigation'),
dict(id='subnavi'),
dict(id='social_bookmarks'),
dict(id='permalink'),
dict(id='content_foren'),
dict(id='seiten_navi'),
dict(id='adbottom'),
dict(id='sitemap')]
feeds = [
('Newsticker', 'http://www.heise.de/newsticker/heise.rdf'),
('Auto', 'http://www.heise.de/autos/rss/news.rdf'),
('Foto ', 'http://www.heise.de/foto/rss/news-atom.xml'),
('Mac&i', 'http://www.heise.de/mac-and-i/news.rdf'),
('Mobile ', 'http://www.heise.de/mobil/newsticker/heise-atom.xml'),
('Netz ', 'http://www.heise.de/netze/rss/netze-atom.xml'),
('Open ', 'http://www.heise.de/open/news/news-atom.xml'),
('Resale ', 'http://www.heise.de/resale/rss/resale.rdf'),
('Security ', 'http://www.heise.de/security/news/news-atom.xml'),
('C`t', 'http://www.heise.de/ct/rss/artikel-atom.xml'),
('iX', 'http://www.heise.de/ix/news/news.rdf'),
('Mach-flott', 'http://www.heise.de/mach-flott/rss/mach-flott-atom.xml'),
('Blog: Babel-Bulletin', 'http://www.heise.de/developer/rss/babel-bulletin/blog.rdf'),
('Blog: Der Dotnet-Doktor', 'http://www.heise.de/developer/rss/dotnet-doktor/blog.rdf'),
('Blog: Bernds Management-Welt', 'http://www.heise.de/developer/rss/bernds-management-welt/blog.rdf'),
('Blog: IT conversation', 'http://www.heise.de/developer/rss/world-of-it/blog.rdf'),
('Blog: Kais bewegtes Web', 'http://www.heise.de/developer/rss/kais-bewegtes-web/blog.rdf')
]
def print_version(self, url):
return url + '?view=print'

Binary file not shown.

After

Width:  |  Height:  |  Size: 558 B

Binary file not shown.

Before

Width:  |  Height:  |  Size: 550 B

After

Width:  |  Height:  |  Size: 1.1 KiB

BIN
recipes/icons/marca.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 293 B

BIN
recipes/icons/natgeo.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 247 B

BIN
recipes/icons/osnews_pl.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 1006 B

Binary file not shown.

After

Width:  |  Height:  |  Size: 722 B

Binary file not shown.

After

Width:  |  Height:  |  Size: 425 B

Binary file not shown.

After

Width:  |  Height:  |  Size: 925 B

BIN
recipes/icons/wash_post.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 1.1 KiB

32
recipes/impulse_de.recipe Normal file
View File

@ -0,0 +1,32 @@
from calibre.web.feeds.news import BasicNewsRecipe
class AdvancedUserRecipe1305470859(BasicNewsRecipe):
title = u'Impulse.de'
language = 'de'
__author__ = 'schuster'
oldest_article =14
max_articles_per_feed = 100
no_stylesheets = True
remove_javascript = True
use_embedded_content = False
cover_url = 'http://www.bvk.de/files/image/bilder/Logo%20Impulse.jpg'
extra_css = '''
h1{font-family:Arial,Helvetica,sans-serif; font-weight:bold;font-size:large;}
h4{font-family:Arial,Helvetica,sans-serif; font-weight:normal;font-size:small;}
img {min-width:300px; max-width:600px; min-height:300px; max-height:800px}
p{font-family:Arial,Helvetica,sans-serif;font-size:small;}
body{font-family:Helvetica,Arial,sans-serif;font-size:small;}
'''
def print_version(self, url):
return url.replace ('#utm_source=rss2&utm_medium=rss_feed&utm_campaign=/', '?mode=print')
remove_tags_bevor = [dict(name='h1', attrs={'class':'h2'})]
remove_tags_after = [dict(name='div', attrs={'class':'artikelfuss'})]
feeds = [ (u'impulstest', u'http://www.impulse.de/rss/')]
remove_tags = [dict(attrs={'class':['navSeitenAlle', 'kommentieren', 'teaserheader', 'teasercontent', 'info', 'zwischenhead', 'kasten_artikel']}),
dict(id=['metaNav', 'impKopf', 'impTopNav', 'impSubNav', 'footerRahmen', 'gatrixx_marktinformationen', 'pager', 'weitere', 'socialmedia', 'rating_open']),
dict(span=['ratingtext', 'Gesamtranking', 'h3','']),
dict(rel=['canonical'])]

View File

@ -3,8 +3,9 @@ from calibre.web.feeds.news import BasicNewsRecipe
class AdvancedUserRecipe1295262156(BasicNewsRecipe):
title = u'kath.net'
__author__ = 'Bobus'
description = u'Katholische Nachrichten'
oldest_article = 7
language = 'en'
language = 'de'
max_articles_per_feed = 100
feeds = [(u'kath.net', u'http://www.kath.net/2005/xml/index.xml')]

View File

@ -5,14 +5,16 @@ class Kathimerini(BasicNewsRecipe):
__author__ = 'Pan'
description = 'News from Greece'
max_articles_per_feed = 100
oldest_article = 100
oldest_article = 100
publisher = 'Kathimerini'
category = 'news, GR'
language = 'el'
encoding = 'windows-1253'
conversion_options = { 'linearize_tables': True}
no_stylesheets = True
remove_tags_before = dict(name='td',attrs={'class':'news'})
remove_tags_after = dict(name='td',attrs={'class':'news'})
remove_attributes = ['width', 'src','header','footer']
remove_tags_before = dict(name='td',attrs={'class':'news'})
remove_tags_after = dict(name='td',attrs={'class':'news'})
remove_attributes = ['width', 'src','header','footer']
feeds = [(u'\u03a0\u03bf\u03bb\u03b9\u03c4\u03b9\u03ba\u03ae',
'http://wk.kathimerini.gr/xml_files/politics.xml'),
@ -34,4 +36,3 @@ class Kathimerini(BasicNewsRecipe):
def print_version(self, url):
return url.replace('http://news.kathimerini.gr/4dcgi/', 'http://news.kathimerini.gr/4dcgi/4dcgi/')

View File

@ -1,14 +1,11 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
__license__ = 'GPL v3'
__copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>'
__copyright__ = '2009-2011, Darko Miletic <darko.miletic at gmail.com>'
'''
www.marca.com
'''
from calibre.web.feeds.news import BasicNewsRecipe
from calibre.ebooks.BeautifulSoup import Tag
class Marca(BasicNewsRecipe):
title = 'Marca'
@ -22,35 +19,30 @@ class Marca(BasicNewsRecipe):
use_embedded_content = False
delay = 1
encoding = 'iso-8859-15'
language = 'es'
language = 'es_ES'
publication_type = 'newsportal'
masthead_url = 'http://estaticos.marca.com/deporte/img/v3.0/img_marca-com.png'
extra_css = """
body{font-family: Tahoma,Geneva,sans-serif}
h1,h2,h3,h4,h5,h6{font-family: 'LatoBlack',Tahoma,Geneva,sans-serif}
.cab_articulo h4 {font-family: Georgia,"Times New Roman",Times,serif}
.antetitulo{text-transform: uppercase}
"""
direction = 'ltr'
feeds = [(u'Portada', u'http://estaticos.marca.com/rss/portada.xml')]
html2lrf_options = [
'--comment' , description
, '--category' , category
, '--publisher', publisher
]
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"'
feeds = [(u'Portada', u'http://rss.marca.com/rss/descarga.htm?data2=425')]
keep_only_tags = [dict(name='div', attrs={'class':['cab_articulo','col_izq']})]
remove_tags = [
dict(name=['object','link','script'])
,dict(name='div', attrs={'class':['colC','peu']})
,dict(name='div', attrs={'class':['utilidades estirar','bloque_int_corr estirar']})
keep_only_tags = [dict(name='div', attrs={'class':['cab_articulo','cuerpo_articulo']})]
remove_attributes = ['lang']
remove_tags = [
dict(name=['object','link','script','embed','iframe','meta','base'])
,dict(name='div', attrs={'class':'tabs'})
]
remove_tags_after = [dict(name='div', attrs={'class':'bloque_int_corr estirar'})]
def preprocess_html(self, soup):
soup.html['dir' ] = self.direction
mcharset = Tag(soup,'meta',[("http-equiv","Content-Type"),("content","text/html; charset=utf-8")])
soup.head.insert(0,mcharset)
for item in soup.findAll(style=True):
del item['style']
return soup
def get_article_url(self, article):
return article.get('guid', None)

24
recipes/max_planck.recipe Normal file
View File

@ -0,0 +1,24 @@
from calibre.web.feeds.recipes import BasicNewsRecipe
class AdvancedUserRecipe1303841067(BasicNewsRecipe):
title = u'Max-Planck-Inst.'
__author__ = 'schuster'
oldest_article = 30
max_articles_per_feed = 100
no_stylesheets = True
use_embedded_content = False
language = 'de'
remove_javascript = True
remove_tags = [dict(attrs={'class':['box_url', 'print_kontakt']}),
dict(id=['skiplinks'])]
def print_version(self, url):
split_url = url.split("/")
print_url = 'http://www.mpg.de/print/' + split_url[3]
return print_url
feeds = [(u'Forschung', u'http://www.mpg.de/de/forschung.rss')]

View File

@ -71,7 +71,7 @@ class Mediapart(BasicNewsRecipe):
br = BasicNewsRecipe.get_browser()
if self.username is not None and self.password is not None:
br.open('http://www.mediapart.fr/')
br.select_form(nr=1)
br.select_form(nr=0)
br['name'] = self.username
br['pass'] = self.password
br.submit()

View File

@ -0,0 +1,10 @@
from calibre.web.feeds.news import BasicNewsRecipe
class AdvancedUserRecipe1305636254(BasicNewsRecipe):
title = u'Mens Health (US)'
language = 'en'
__author__ = 'Anonymous'
oldest_article = 14
max_articles_per_feed = 100
feeds = [(u'News', u'http://blogs.menshealth.com/health-headlines/feed')]

29
recipes/metro_uk.recipe Normal file
View File

@ -0,0 +1,29 @@
from calibre.web.feeds.news import BasicNewsRecipe
class AdvancedUserRecipe1306097511(BasicNewsRecipe):
title = u'Metro UK'
no_stylesheets = True
oldest_article = 1
max_articles_per_feed = 200
__author__ = 'Dave Asbury'
language = 'en_GB'
simultaneous_downloads= 3
masthead_url = 'http://e-edition.metro.co.uk/images/metro_logo.gif'
keep_only_tags = [
dict(attrs={'class':['img-cnt figure']}),
dict(attrs={'class':['art-img']}),
dict(name='h1'),
dict(name='h2', attrs={'class':'h2'}),
dict(name='div', attrs={'class':'art-lft'})
]
remove_tags = [dict(name='div', attrs={'class':[ 'metroCommentFormWrap',
'commentForm', 'metroCommentInnerWrap',
'art-rgt','pluck-app pluck-comm','news m12 clrd clr-l p5t', 'flt-r' ]})]
feeds = [
(u'News', u'http://www.metro.co.uk/rss/news/'), (u'Money', u'http://www.metro.co.uk/rss/money/'), (u'Sport', u'http://www.metro.co.uk/rss/sport/'), (u'Film', u'http://www.metro.co.uk/rss/metrolife/film/'), (u'Music', u'http://www.metro.co.uk/rss/metrolife/music/'), (u'TV', u'http://www.metro.co.uk/rss/tv/'), (u'Showbiz', u'http://www.metro.co.uk/rss/showbiz/'), (u'Weird News', u'http://www.metro.co.uk/rss/weird/'), (u'Travel', u'http://www.metro.co.uk/rss/travel/'), (u'Lifestyle', u'http://www.metro.co.uk/rss/lifestyle/'), (u'Books', u'http://www.metro.co.uk/rss/lifestyle/books/'), (u'Food', u'http://www.metro.co.uk/rss/lifestyle/restaurants/')]

71
recipes/natgeo.recipe Normal file
View File

@ -0,0 +1,71 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
__license__ = 'GPL v3'
__copyright__ = '2011, gagsays <gagsays at gmail dot com>'
'''
nationalgeographic.com
'''
from calibre.web.feeds.news import BasicNewsRecipe
class NatGeo(BasicNewsRecipe):
title = u'National Geographic'
description = 'Daily news articles from The National Geographic'
language = 'en'
oldest_article = 20
max_articles_per_feed = 25
encoding = 'utf8'
publisher = 'nationalgeographic.com'
category = 'science, nat geo'
__author__ = 'gagsays'
masthead_url = 'http://s.ngeo.com/wpf/sites/themes/global/i/presentation/ng_logo_small.png'
description = 'Inspiring people to care about the planet since 1888'
timefmt = ' [%a, %d %b, %Y]'
no_stylesheets = True
use_embedded_content = False
extra_css = '''
body {color: #000000;font-size: medium;}
h1 {color: #222222; font-size: large; font-weight:lighter; text-decoration:none; text-align: center;font-family:Georgia,Times New Roman,Times,serif;}
h2 {color: #454545; font-size: small; font-weight:lighter; text-decoration:none; text-align: justify; font-style:italic;font-family :Georgia,Times New Roman,Times,serif;}
h3 {color: #555555; font-size: small; font-style:italic; margin-top: 10px;}
img{margin-bottom: 0.25em;display:block;margin-left: auto;margin-right: auto;}
a:link,a,.a,href {text-decoration: none;color: #000000;}
.caption{color: #000000;font-size: xx-small;text-align: justify;font-weight:normal;}
.credit{color: #555555;font-size: xx-small;text-align: left;font-weight:lighter;}
p.author,p.publication{color: #000000;font-size: xx-small;text-align: left;display:inline;}
p.publication_time{color: #000000;font-size: xx-small;text-align: right;text-decoration: underline;}
p {margin-bottom: 0;}
p + p {text-indent: 1.5em;margin-top: 0;}
.hidden{display:none;}
#page_head{text-transform:uppercase;}
'''
def parse_feeds (self):
feeds = BasicNewsRecipe.parse_feeds(self)
for feed in feeds:
for article in feed.articles[:]:
if 'Presented' in article.title or 'Pictures' in article.title:
feed.articles.remove(article)
return feeds
def preprocess_html(self, soup):
for alink in soup.findAll('a'):
if alink.string is not None:
tstr = alink.string
alink.replaceWith(tstr)
return soup
remove_tags_before = dict(id='page_head')
keep_only_tags = [
dict(name='div',attrs={'id':['page_head','content_mainA']})
]
remove_tags_after = [
dict(name='div',attrs={'class':['article_text','promo_collection']})
]
remove_tags = [
dict(name='div', attrs={'class':['aside','primary full_width']})
,dict(name='div', attrs={'id':['header_search','navigation_mainB_wrap']})
]
feeds = [
(u'Daily News', u'http://feeds.nationalgeographic.com/ng/News/News_Main')
]

View File

@ -0,0 +1,25 @@
from calibre.web.feeds.news import BasicNewsRecipe
class AdvancedUserRecipe1305567197(BasicNewsRecipe):
title = u'National Geographic (DE)'
__author__ = 'Anonymous'
language = 'de'
oldest_article = 7
max_articles_per_feed = 1000
no_stylesheets = True
use_embedded_content = False
remove_javascript = True
cover_url = 'http://www.nationalgeographic.de/images/national-geographic-logo.jpg'
keep_only_tags = [
dict(name='div', attrs={'class':['contentbox_no_top_border']}) ]
remove_tags = [
dict(name='div', attrs={'class':'related'}),
dict(name='li', attrs={'class':'first'}),
dict(name='div', attrs={'class':'extrasbox_inner'}),
]
feeds = [ (u'National Geographic', u'http://feeds.nationalgeographic.de/ng-neueste-artikel'),
]

View File

@ -11,6 +11,20 @@ class Newsweek(BasicNewsRecipe):
BASE_URL = 'http://www.newsweek.com'
topics = {
'Culture' : '/tag/culture.html',
'Business' : '/tag/business.html',
'Society' : '/tag/society.html',
'Science' : '/tag/science.html',
'Education' : '/tag/education.html',
'Politics' : '/tag/politics.html',
'Health' : '/tag/health.html',
'World' : '/tag/world.html',
'Nation' : '/tag/nation.html',
'Technology' : '/tag/technology.html',
'Game Changers' : '/tag/game-changers.html',
}
keep_only_tags = dict(name='article', attrs={'class':'article-text'})
remove_tags = [dict(attrs={'data-dartad':True})]
remove_attributes = ['property']
@ -21,14 +35,10 @@ class Newsweek(BasicNewsRecipe):
return soup
def newsweek_sections(self):
return [
('Nation', 'http://www.newsweek.com/tag/nation.html'),
('Society', 'http://www.newsweek.com/tag/society.html'),
('Culture', 'http://www.newsweek.com/tag/culture.html'),
('World', 'http://www.newsweek.com/tag/world.html'),
('Politics', 'http://www.newsweek.com/tag/politics.html'),
('Business', 'http://www.newsweek.com/tag/business.html'),
]
for topic_name, topic_url in self.topics.iteritems():
yield (topic_name,
self.BASE_URL+topic_url)
def newsweek_parse_section_page(self, soup):
for article in soup.findAll('article', about=True,
@ -59,7 +69,11 @@ class Newsweek(BasicNewsRecipe):
for section, shref in self.newsweek_sections():
self.log('Processing section', section, shref)
articles = []
soups = [self.index_to_soup(shref)]
try:
soups = [self.index_to_soup(shref)]
except:
self.log.warn('Section %s not found, skipping'%section)
continue
na = soups[0].find('a', rel='next')
if na:
soups.append(self.index_to_soup(self.BASE_URL+na['href']))

29
recipes/ngz.recipe Normal file
View File

@ -0,0 +1,29 @@
from calibre.web.feeds.recipes import BasicNewsRecipe
class AdvancedUserRecipe1303841067(BasicNewsRecipe):
title = u'NGZ-online'
__author__ = 'schuster'
remove_tags_before = dict(id='bu')
remove_tags_after = dict(id='noblock')
remove_tags = [dict(attrs={'class':['articleTools', 'post-tools', 'side_tool', 'nextArticleLink clearfix', 'liketext']}),
dict(id=['footer', 'toolsRight', 'articleInline', 'navigation', 'archive', 'side_search', 'blog_sidebar', 'side_tool', 'side_index', 'Verlinken', 'vorheriger', 'LESERKOMMENTARE', 'bei facebook', 'bei twitter', 'Schreiben Sie jetzt Ihre Meinung:', 'Thema', 'Ihr Beitrag', 'Ihr Name', 'Ich möchte über weitere Lesermeinungen zu diesem Artikel per E-Mail informiert werden.', 'banneroben', 'bannerrechts', 'inserieren', 'stellen', 'auto', 'immobilien', 'kleinanzeige', 'tiere', 'ferienwohnung', 'NGZ Card', 'Mediengruppe RP', 'Werben', 'Newsletter', 'Wetter', 'RSS', 'Abo', 'Anzeigen', 'Redaktion', 'Schulprojekte', 'Gast', 'Mein NGZ', 'Nachrichten', 'Sport', 'Wirtschaft', 'Stadt-Infos', 'Bilderserien', 'Bookmarken', 'del.icio.us', 'Mister Wong', 'YiGG', 'Webnews', 'Shortnews', 'Twitter', 'Newsider', 'Facebook', 'StudiVZ/MeinVZ', 'Versenden', 'Drucken']),
dict(name=['script', 'noscript', 'style'])]
oldest_article = 7
max_articles_per_feed = 100
no_stylesheets = True
use_embedded_content = False
language = 'de'
remove_javascript = True
cover_url = 'http://www.rhein-kreis-neuss-macht-sport.de/sport/includes/bilder/ngz_logo.jpg'
def print_version(self, url):
return url + '?ot=de.circit.rpo.PopupPageLayout.ot'
feeds = [
(u'Grevenbroich', u'http://www.ngz-online.de/app/feed/rss/grevenbroich'),
(u'Kreis Neuss', u'http://www.ngz-online.de/app/feed/rss/rheinkreisneuss'),
(u'Dormagen', u'http://www.ngz-online.de/app/feed/rss/dormagen'),
(u'J\xfcchen', u'http://www.ngz-online.de/app/feed/rss/juechen'),
(u'Rommerskirchen', u'http://www.ngz-online.de/app/feed/rss/rommerskirchen')
]

View File

@ -0,0 +1,64 @@
import re
from calibre.web.feeds.news import BasicNewsRecipe
coverpage = None
class ObservatorulCultural(BasicNewsRecipe):
title = u'Observatorul cultural'
__author__ = 'song2' #prelucrat dupa un script de http://www.thenowhereman.com
encoding = 'utf-8'
language = 'ro'
publication_type = 'magazine'
description = 'Spiritul critic in acţiune\n'
no_stylesheets = True
remove_javascript = True
masthead_url='http://www.observatorcultural.ro/userfiles/article/sigla%20Observator%20cultural_02231058.JPG'
keep_only_tags = [
dict(name='div', attrs={'class':'detaliuArticol'})]
remove_tags = [dict(name='div', attrs={'class':'comentariiArticol'}),
dict(name='div', attrs={'class':'postComment'}),
dict(name='div', attrs={'class':'utileArticol'}),
dict(name='p', attrs={'class':'butonComenteaza'}),
dict(name='h5'),
dict(name='div', attrs={'style':'margin-top: 0px; padding-top: 0px;'})
]
def parse_index(self):
soup = self.index_to_soup('http://www.observatorcultural.ro/Arhiva*-archive.html')
issueTag = soup.find('a', href=re.compile("observatorcultural.ro\/Numarul"))
issueURL = issueTag['href']
print issueURL;
issueSoup = self.index_to_soup(issueURL)
feeds = []
stories = []
for categorie in issueSoup.findAll('dl',attrs={'class':'continutArhive'}):
categ=self.tag_to_string(categorie.find('dt'))
for story in categorie.findAll('dd'):
title=[]
for bucatele in story.findAll('a'):
title.append(bucatele)
if len(title)==1: #daca articolul nu are autor
stories.append({
'title' : self.tag_to_string(title[0]),
'url' : title[0]['href'],
'date' : '',
'author' : ''})
else: # daca articolul are autor len(title)=2
stories.append({
'title' : self.tag_to_string(title[1]),
'url' :title[1]['href'],
'date' : '',
'author' : self.tag_to_string(title[0])})
print(self.tag_to_string(title[0]))
if 'Editorial' in categ:
global coverpage
coverpage=title[1]['href'] # am luat link-ul spre editorial
feeds.append((categ,stories))
stories = []
print feeds
return feeds
#procedura de luat coperta
def get_cover_url(self):
soup = self.index_to_soup(coverpage)
link_item = soup.find('a',attrs={'rel':'lightbox'}) # caut imaginea textului
a=''
cover_url = a.join(link_item.img['src'].split('_details_'))
return cover_url

View File

@ -0,0 +1,35 @@
from calibre.web.feeds.news import BasicNewsRecipe
class AdvancedUserRecipe(BasicNewsRecipe):
title = u'Polizeipresse - Deutschland'
__author__ = 'schuster'
description = 'Tagesaktuelle "Polizeiberichte" aus ganz Deutschland (bis auf Ortsebene).' 'Um deinen Ort/Stadt/Kreis usw. einzubinden, gehe auf "http://www.presseportal.de/polizeipresse/" und suche im oberen "Suchfeld" nach dem Namen.' 'Oberhalb der Suchergebnisse (Folgen:) auf den üblichen link zu den RSS-Feeds klicken und den RSS-link im Rezept unter "feeds" eintragen wie üblich.' 'Die Auswahl von Orten kann vereinfacht werden wenn man den Suchbegriff wie folgt eingibt:' '"Stadt-Ort".'
oldest_article = 21
max_articles_per_feed = 100
no_stylesheets = True
use_embedded_content = False
language = 'de'
remove_javascript = True
masthead_url = 'http://www.alt-heliservice.de/images/34_BPOL_Logo_4C_g_schutzbereich.jpg'
cover_url = 'http://berlinstadtservice.de/buerger/Bundespolizei-Logo.png'
remove_tags = [
dict(name='div', attrs={'id':'logo'}),
dict(name='div', attrs={'id':'origin'}),
dict(name='pre', attrs={'class':'xml_contact'})]
def print_version(self,url):
segments = url.split('/')
printURL = 'http://www.presseportal.de/print.htx?nr=' + '/'.join(segments[5:6]) + '&type=polizei'
return printURL
feeds = [(u'Frimmerdorf', u'http://www.presseportal.de/rss/rss2_vts.htx?q=Grevenbroich-frimmersdorf&w=public_service'),
(u'Neurath', u'http://www.presseportal.de/rss/rss2_vts.htx?q=Grevenbroich-neurath&w=public_service'),
(u'Gustorf', u'http://www.presseportal.de/rss/rss2_vts.htx?q=Grevenbroich-gustorf&w=public_service'),
(u'Neuenhausen', u'http://www.presseportal.de/rss/rss2_vts.htx?q=Grevenbroich-neuenhausen&w=public_service'),
(u'Wevelinghoven', u'http://www.presseportal.de/rss/rss2_vts.htx?q=Grevenbroich-Wevelinghoven&w=public_service'),
(u'Grevenbroich ges.', u'http://www.presseportal.de/rss/rss2_vts.htx?q=grevenbroich&w=public_service'),
(u'Kreis Neuss ges.', u'http://www.presseportal.de/rss/rss2_vts.htx?q=Rhein-Kreis+Neuss&w=public_service'),
]

22
recipes/pro_physik.recipe Normal file
View File

@ -0,0 +1,22 @@
from calibre.web.feeds.recipes import BasicNewsRecipe
class AdvancedUserRecipe1303841067(BasicNewsRecipe):
title = u'Pro Physik'
__author__ = 'schuster'
oldest_article = 4
max_articles_per_feed = 100
no_stylesheets = True
use_embedded_content = False
language = 'de'
remove_javascript = True
cover_url = 'http://www.pro-physik.de/Phy/images/site/prophysik_logo1.jpg'
def print_version(self, url):
return url.replace('leadArticle.do', 'print.do')
feeds = [(u'Hightech', u'http://www.pro-physik.de/Phy/hightechfeed.xml'),
(u'Forschung', u'http://www.pro-physik.de/Phy/forschungfeed.xml'),
(u'Magazin', u'http://www.pro-physik.de/Phy/magazinfeed.xml')]

View File

@ -0,0 +1,55 @@
from calibre.web.feeds.recipes import BasicNewsRecipe
class AdvancedUserRecipe(BasicNewsRecipe):
title = u'RP-online'
__author__ = 'schuster'
oldest_article = 2
max_articles_per_feed = 100
no_stylesheets = True
use_embedded_content = False
language = 'de'
remove_javascript = True
masthead_url = 'http://www.die-zeitungen.de/uploads/pics/LOGO_RP_ONLINE_01.jpg'
cover_url = 'http://www.manroland.com/com/pressinfo_images/com/RheinischePost_Logo_300dpi.jpg'
extra_css = '''
h1{font-family:Arial,Helvetica,sans-serif; font-weight:bold;font-size:large;}
h4{font-family:Arial,Helvetica,sans-serif; font-weight:normal;font-size:small;}
img {min-width:300px; max-width:600px; min-height:300px; max-height:800px}
p{font-family:Arial,Helvetica,sans-serif;font-size:small;}
body{font-family:Helvetica,Arial,sans-serif;font-size:small;}
'''
remove_tags_before = dict(id='article_content')
remove_tags_after = dict(id='article_content')
remove_tags = [dict(attrs={'class':['goodies', 'left', 'right', 'clear-all', 'teaser anzeigenwerbung', 'lesermeinung', 'goodiebox', 'goodiebox 1', 'goodiebox 2', 'goodiebox 3', 'boxframe', 'link']}),
dict(id=['click_Fotos_link']),
dict(name=['script', 'noscript', 'style', '_top', 'click_Fotos_link'])]
feeds = [ (u'Top-News', u'http://www.ngz-online.de/app/feed/rss/topnews'),
(u'Politik', u'http://www.ngz-online.de/app/feed/rss/politik'),
(u'Wirtschaft', u'http://www.ngz-online.de/app/feed/rss/wirtschaft'),
(u'Panorama', u'http://www.ngz-online.de/app/feed/rss/panorama'),
(u'Sport', u'http://www.ngz-online.de/app/feed/rss/sport'),
(u'Tour de France', u'http://www.ngz-online.de/app/feed/rss/tourdefrance'),
(u'Fußball', u'http://www.ngz-online.de/app/feed/rss/fussball'),
(u'Fußball BuLi', u'http://www.ngz-online.de/app/feed/rss/bundesliga'),
(u'Formel 1', u'http://www.ngz-online.de/app/feed/rss/formel1'),
(u'US-Sport', u'http://www.ngz-online.de/app/feed/rss/us-sports'),
(u'Boxen', u'http://www.ngz-online.de/app/feed/rss/boxen'),
(u'Eishockey', u'http://www.ngz-online.de/app/feed/rss/eishockey'),
(u'Basketball', u'http://www.ngz-online.de/app/feed/rss/basketball'),
(u'Handball', u'http://www.ngz-online.de/app/feed/rss/handball'),
(u'Motorsport', u'http://www.ngz-online.de/app/feed/rss/motorsport'),
(u'Tennis', u'http://www.ngz-online.de/app/feed/rss/tennis'),
(u'Radsport', u'http://www.ngz-online.de/app/feed/rss/radsport'),
(u'Kultur', u'http://www.ngz-online.de/app/feed/rss/kultur'),
(u'Gesellschaft', u'http://www.ngz-online.de/app/feed/rss/gesellschaft'),
(u'Wissenschaft', u'http://www.ngz-online.de/app/feed/rss/wissen'),
(u'Gesundheit', u'http://www.ngz-online.de/app/feed/rss/gesundheit'),
(u'Digitale Welt', u'http://www.ngz-online.de/app/feed/rss/digitale'),
(u'Auto & Mobil', u'http://www.ngz-online.de/app/feed/rss/auto'),
(u'Reise & Welt', u'http://www.ngz-online.de/app/feed/rss/reise'),
(u'Beruf & Karriere', u'http://www.ngz-online.de/app/feed/rss/beruf'),
(u'Herzrasen', u'http://www.ngz-online.de/app/feed/rss/herzrasen'),
(u'About a Boy', u'http://www.ngz-online.de/app/feed/rss/about_a_boy'),
]

View File

@ -1,52 +0,0 @@
#!/usr/bin/env python
__license__ = 'GPL v3'
__author__ = 'Mori'
__version__ = 'v. 0.1'
'''
www.runa.pl/blog
'''
from calibre.web.feeds.news import BasicNewsRecipe
import re
class FantazmatyRecipe(BasicNewsRecipe):
__author__ = 'Mori'
language = 'pl'
title = u'Fantazmaty'
publisher = u'Agencja Wydawnicza Runa'
description = u'Blog Agencji Wydawniczej Runa'
no_stylesheets = True
remove_javascript = True
encoding = 'utf-8'
oldest_article = 100
max_articles_per_feed = 100
extra_css = '''
img{float: left; padding-right: 10px; padding-bottom: 5px;}
'''
feeds = [
(u'Fantazmaty', u'http://www.runa.pl/blog/rss.xml')
]
remove_tags = [
dict(name = 'div', attrs = {'class' : 'path'}),
dict(name = 'div', attrs = {'class' : 'drdot'}),
dict(name = 'div', attrs = {'class' : 'picture'})
]
remove_tags_after = [
dict(name = 'div', attrs = {'class' : 'content'})
]
preprocess_regexps = [
(re.compile(i[0], re.IGNORECASE | re.DOTALL), i[1]) for i in
[
(r'<body>.*?<div id="primary"', lambda match: '<body><div id="primary"'),
(r'<!--.*?-->', lambda match: '')
]
]

28
recipes/spektrum.recipe Normal file
View File

@ -0,0 +1,28 @@
from calibre.web.feeds.recipes import BasicNewsRecipe
class AdvancedUserRecipe1303841067(BasicNewsRecipe):
title = u'Spektrum (der Wissenschaft)'
__author__ = 'schuster'
oldest_article = 7
max_articles_per_feed = 100
language = 'de'
cover_url = 'http://upload.wikimedia.org/wikipedia/de/3/3b/Spektrum_der_Wissenschaft_Logo.svg'
remove_tags = [dict(attrs={'class':['hauptnaviPkt gainlayout', 'hauptnaviButton', 'suchButton', 'suchbegriffKasten', 'loginButton', 'subnavigation', 'artikelInfoLeiste gainlayout', 'artikelTools', 'nurLetzteSeite', 'link', 'boxUnterArtikel', 'leserbriefeBlock', 'boxTitel', 'boxInhalt', 'sehrklein', 'boxabstand', 'werbeboxinhalt', 'rbabstand', 'bildlinks', 'rechtebox', 'denkmalbox', 'denkmalfrage']}),
dict(id=['pflip', 'verlagsleiste', 'bereich', 'bannerVertikal', 'headerLogoLink', 'kopf', 'topNavi', 'headerSchnellsuche', 'headerSchnellsucheWarten', 'navigation', 'navigationL', 'navigationR', 'inhalt', 'rechtespalte', 'sdwboxenshop', 'shopboxen', 'fuss']),
dict(name=['naservice'])]
def print_version(self,url):
newurl = url.replace('artikel/', 'sixcms/detail.php?id=')
return newurl + '&_druckversion=1'
feeds = [(u'Spektrum der Wissenschaft', u'http://www.spektrum.de/artikel/982623'),
(u'SpektrumDirekt', u'http://www.spektrumdirekt.de/artikel/996406'),
(u'Sterne und Weltraum', u'http://www.astronomie-heute.de/artikel/865248'),
(u'Gehirn & Geist', u'http://www.gehirn-und-geist.de/artikel/982626'),
(u'epoc', u'http://www.epoc.de/artikel/982625')
]
filter_regexps = [r'ads\.doubleclick\.net']

View File

@ -0,0 +1,24 @@
from calibre.web.feeds.recipes import BasicNewsRecipe
class AdvancedUserRecipe1303841067(BasicNewsRecipe):
title = u'Technology Review'
__author__ = 'schuster'
remove_tags_before = dict(id='keywords')
remove_tags_after = dict(id='kommentar')
remove_tags = [dict(attrs={'class':['navi_oben_pvg', 'navi_oben_tarifr', 'navi_oben_itm', 'navi_oben_eve', 'navi_oben_whi', 'navi_oben_abo', 'navi_oben_shop', 'navi_top_logo', 'navi_top_abschnitt', 'first']}),
dict(id=['footer', 'toolsRight', 'articleInline', 'navigation', 'archive', 'side_search', 'blog_sidebar', 'side_tool', 'side_index']),
dict(name=['script', 'noscript', 'style'])]
oldest_article = 4
max_articles_per_feed = 100
no_stylesheets = True
use_embedded_content = False
language = 'de'
remove_javascript = True
def print_version(self, url):
return url + '?view=print'
feeds = [
(u'Technik News', u'http://www.heise.de/tr/news-atom.xml') ]

View File

@ -49,6 +49,7 @@ class TelegraphUK(BasicNewsRecipe):
(u'UK News' , u'http://www.telegraph.co.uk/news/uknews/rss' )
,(u'World News' , u'http://www.telegraph.co.uk/news/worldnews/rss' )
,(u'Politics' , u'http://www.telegraph.co.uk/news/newstopics/politics/rss' )
,(u'Finance' , u'http://www.telegraph.co.uk/finance/rss' )
,(u'Technology News', u'http://www.telegraph.co.uk/scienceandtechnology/technology/technologynews/rss' )
,(u'UK News' , u'http://www.telegraph.co.uk/scienceandtechnology/technology/technologyreviews/rss')
,(u'Science News' , u'http://www.telegraph.co.uk/scienceandtechnology/science/sciencenews/rss' )

View File

@ -1,5 +1,5 @@
__license__ = 'GPL v3'
__copyright__ = '2008 - 2010, Darko Miletic <darko.miletic at gmail.com>'
__copyright__ = '2008 - 2011, Darko Miletic <darko.miletic at gmail.com>'
'''
thenation.com
'''
@ -16,10 +16,17 @@ class Thenation(BasicNewsRecipe):
max_articles_per_feed = 100
no_stylesheets = True
language = 'en'
use_embedded_content = False
delay = 1
masthead_url = 'http://www.thenation.com/sites/default/themes/thenation/images/logo-main.gif'
exra_css = ' body{font-family: Arial,Helvetica,sans-serif;} .print-created{font-size: small;} .caption{display: block; font-size: x-small;} '
use_embedded_content = False
delay = 1
masthead_url = 'http://www.thenation.com/sites/default/themes/thenation/images/logo-main.gif'
login_url = 'http://www.thenation.com/user?destination=%3Cfront%3E'
publication_type = 'magazine'
needs_subscription = 'optional'
exra_css = """
body{font-family: Arial,Helvetica,sans-serif;}
.print-created{font-size: small;}
.caption{display: block; font-size: x-small;}
"""
conversion_options = {
'comment' : description
@ -28,13 +35,30 @@ class Thenation(BasicNewsRecipe):
, 'language' : language
}
keep_only_tags = [ dict(attrs={'class':['print-title','print-created','print-content','print-links']}) ]
remove_tags = [dict(name='link')]
keep_only_tags = [dict(attrs={'class':['print-title','print-created','print-content','print-links']})]
remove_tags = [dict(name=['link','iframe','base','meta','object','embed'])]
remove_attributes = ['lang']
feeds = [(u"Editor's Picks", u'http://www.thenation.com/rss/editors_picks')]
feeds = [(u"Articles", u'http://www.thenation.com/rss/articles')]
def print_version(self, url):
return url.replace('.thenation.com/','.thenation.com/print/')
def preprocess_html(self, soup):
return self.adeify_images(soup)
def get_browser(self):
br = BasicNewsRecipe.get_browser()
br.open('http://www.thenation.com/')
if self.username is not None and self.password is not None:
br.open(self.login_url)
br.select_form(nr=1)
br['name'] = self.username
br['pass'] = self.password
br.submit()
return br
def get_cover_url(self):
soup = self.index_to_soup('http://www.thenation.com/issue/')
item = soup.find('div',attrs={'id':'cover-wrapper'})
if item:
return item.img['src']
return None

View File

@ -10,8 +10,8 @@ import re
from calibre.web.feeds.news import BasicNewsRecipe
class Time(BasicNewsRecipe):
recipe_disabled = ('This recipe has been disabled as TIME no longer'
' publish complete articles on the web.')
#recipe_disabled = ('This recipe has been disabled as TIME no longer'
# ' publish complete articles on the web.')
title = u'Time'
__author__ = 'Kovid Goyal and Sujata Raman'
description = 'Weekly magazine'

View File

@ -14,6 +14,7 @@ class UnitedDaily(BasicNewsRecipe):
(u'生活', u'http://udn.com/udnrss/life.xml'),
(u'綜合', u'http://udn.com/udnrss/education.xml'),
(u'意見評論', u'http://udn.com/udnrss/opinion.xml'),
(u'校園博覽會', u'http://mag.udn.com/udnrss/campus_rss.xml'),
(u'大台北', u'http://udn.com/udnrss/local_taipei.xml'),
(u'桃竹苗', u'http://udn.com/udnrss/local_tyhcml.xml'),
(u'中彰投', u'http://udn.com/udnrss/local_tcchnt.xml'),
@ -21,15 +22,21 @@ class UnitedDaily(BasicNewsRecipe):
(u'高屏離島', u'http://udn.com/udnrss/local_ksptisland.xml'),
(u'基宜花東', u'http://udn.com/udnrss/local_klilhltt.xml'),
(u'台灣百寶鄉', u'http://udn.com/udnrss/local_oddlyenough.xml'),
(u'台灣人物', u'http://mag.udn.com/udnrss/people_rss.xml'),
(u'兩岸要聞', u'http://udn.com/udnrss/mainland.xml'),
(u'國際焦點', u'http://udn.com/udnrss/international.xml'),
(u'台商經貿', u'http://udn.com/udnrss/financechina.xml'),
(u'國際財經', u'http://udn.com/udnrss/financeworld.xml'),
(u'全球觀察', u'http://mag.udn.com/udnrss/world_rss.xml'),
(u'財經焦點', u'http://udn.com/udnrss/financesfocus.xml'),
(u'股市要聞', u'http://udn.com/udnrss/stock.xml'),
(u'股市快訊', u'http://udn.com/udnrss/stklatest.xml'),
(u'稅務法務', u'http://udn.com/udnrss/tax.xml'),
(u'房市情報', u'http://udn.com/udnrss/houses.xml'),
(u'個人理財', u'http://mag.udn.com/udnrss/wealth_rss.xml'),
(u'研究報告', u'http://mag.udn.com/udnrss/report_rss.xml'),
(u'基金', u'http://mag.udn.com/udnrss/fund_rss.xml'),
(u'理財會客室', u'http://mag.udn.com/udnrss/m_forum_rss.xml'),
(u'棒球', u'http://udn.com/udnrss/baseball.xml'),
(u'籃球', u'http://udn.com/udnrss/basketball.xml'),
(u'體壇動態', u'http://udn.com/udnrss/sportsfocus.xml'),
@ -40,19 +47,24 @@ class UnitedDaily(BasicNewsRecipe):
(u'電影世界', u'http://udn.com/udnrss/movie.xml'),
(u'流行音樂', u'http://udn.com/udnrss/music.xml'),
(u'觀點專題', u'http://udn.com/udnrss/starssubject.xml'),
(u'消費流行', u'http://mag.udn.com/udnrss/happylife_rss.xml'),
(u'食樂指南', u'http://udn.com/udnrss/food.xml'),
(u'數位資訊', u'http://mag.udn.com/udnrss/digital_rss.xml'),
(u'折扣好康', u'http://udn.com/udnrss/shopping.xml'),
(u'發燒車訊', u'http://mag.udn.com/udnrss/car_rss.xml'),
(u'醫藥新聞', u'http://udn.com/udnrss/health.xml'),
(u'家婦繽紛', u'http://udn.com/udnrss/benfen.xml'),
(u'談星論命', u'http://udn.com/udnrss/astrology.xml'),
(u'文化副刊', u'http://udn.com/udnrss/reading.xml'),
(u'旅遊休閒', u'http://travel.udn.com/udnrss/travel_rss.xml'),
(u'健康醫藥', u'http://mag.udn.com/udnrss/life_rss.xml'),
]
extra_css = '''div[id='story_title'] {font-size:200%; font-weight:bold;}'''
extra_css = '''div[id='story_title'] {font-size:200%; font-weight:bold;} td[class='story_title'] {font-size:200%; font-weight:bold;} td[class='story_title'] td[class='story_title']>div {font-size:200%; font-weight:bold;}'''
__author__ = 'Eddie Lau'
__version__ = '1.0'
language = 'zh'
__version__ = '1.1'
language = 'zh-TW'
publisher = 'United Daily News Group'
description = 'United Daily (Taiwan)'
category = 'News, Chinese, Taiwan'
@ -63,5 +75,12 @@ class UnitedDaily(BasicNewsRecipe):
conversion_options = {'linearize_tables':True}
masthead_url = 'http://udn.com/NEWS/2004/images/logo_udn.gif'
cover_url = 'http://udn.com/NEWS/2004/images/logo_udn.gif'
keep_only_tags = [dict(name='div', attrs={'id':['story_title','story_author', 'story']})]
keep_only_tags = [dict(name='td', attrs={'class':['story_title']}),
dict(name='div', attrs={'id':['story_title']}),
dict(name='td', attrs={'class':['story_author']}),
dict(name='div', attrs={'id':['story_author']}),
dict(name='td', attrs={'class':['story']}),
dict(name='div', attrs={'id':['story']}),
]
remove_tags = [dict(name='div', attrs={'id':['mvouter']})]

64
recipes/version2.recipe Normal file
View File

@ -0,0 +1,64 @@
import re
__license__ = 'GPL v3'
__copyright__ = '2011, Rasmus Lauritsen <rasmus at lauritsen.info>'
'''
version2.dk
'''
from calibre.web.feeds.news import BasicNewsRecipe
class version2(BasicNewsRecipe):
title = 'Version2.dk'
__author__ = 'Rasmus Lauritsen'
description = 'IT News'
publisher = 'version2.dk'
category = 'news, IT, hardware, software, Denmark'
oldest_article = 14
max_articles_per_feed = 50
no_stylesheets = True
remove_empty_feeds = True
use_embedded_content = False
encoding = 'iso-8859-1'
language = 'da'
extra_css = """
body {font-family: "Verdana",Times,serif}
.articleauthor{color: #9F9F9F;
font-family: Arial, sans-serif;
font-size: small;
text-transform: uppercase}
.rubric,.dd,h6#credit{color: #CD0021;
font-family: Arial, sans-serif;
font-size: small;
text-transform: uppercase}
.descender:first-letter{display: inline; font-size: xx-large; font-weight: bold}
.dd,h6#credit{color: gray}
.c{display: block}
.caption,h2#articleintro{font-style: italic}
.caption{font-size: small}
"""
preprocess_regexps = [ (re.compile(r'</?a[^>]*>'),lambda match: ''),
(re.compile(r'<span[^>]*article-link-id.*?<br\s*\/?><br\s*\/?>'), lambda match: '')]
keep_only_tags = [dict(name='div', attrs={'class':'article'})]
remove_tags = [
dict(name='p',attrs={'class':'meta links'}),
dict(name='div',attrs={'class':'float-right'}),
dict(name='span',attrs={'class':'article-link-id'})
]
feeds = [
(u'Seneste nyheder' , u'http://www.version2.dk/feeds/nyheder')
,(u'Forretningssoftware' , u'http://www.version2.dk/feeds/forretningssoftware')
,(u'Internet & styresystemer' , u'http://www.version2.dk/feeds/styresystemer')
,(u'It-arkitektur' , u'http://www.version2.dk/feeds/it-arkitektur')
,(u'It-styring & outsourcing' , u'http://www.version2.dk/feeds/it-styring')
,(u'Job & karriere' , u'http://www.version2.dk/feeds/karriere')
,(u'Mobil it & tele' , u'http://www.version2.dk/feeds/tele')
,(u'Server/storage & netværk' , u'http://www.version2.dk/feeds/server-storage')
,(u'Sikkerhed' , u'http://www.version2.dk/feeds/sikkerhed')
,(u'Softwareudvikling' , u'http://www.version2.dk/feeds/softwareudvikling')
]

View File

@ -1,64 +1,75 @@
__license__ = 'GPL v3'
__copyright__ = '2011, Darko Miletic <darko.miletic at gmail.com>'
'''
www.washingtonpost.com
'''
from calibre import strftime
from calibre.web.feeds.news import BasicNewsRecipe
class TheWashingtonPost(BasicNewsRecipe):
title = 'The Washington Post'
__author__ = 'Darko Miletic'
description = 'Leading source for news, video and opinion on politics, business, world and national news, science, travel, entertainment and more. Our local coverage includes reporting on education, crime, weather, traffic, real estate, jobs and cars for DC, Maryland and Virginia. Offering award-winning opinion writing, entertainment information and restaurant reviews.'
publisher = 'The Washington Post Company'
category = 'news, politics, USA'
oldest_article = 2
max_articles_per_feed = 200
no_stylesheets = True
encoding = 'utf8'
delay = 1
use_embedded_content = False
language = 'en'
remove_empty_feeds = True
publication_type = 'newspaper'
masthead_url = 'http://www.washingtonpost.com/rw/sites/twpweb/img/logos/twp_logo_300.gif'
cover_url = strftime('http://www.washingtonpost.com/rw/WashingtonPost/Content/Epaper/%Y-%m-%d/Ax1.pdf')
extra_css = """
body{font-family: Georgia,serif }
"""
class WashingtonPost(BasicNewsRecipe):
conversion_options = {
'comment' : description
, 'tags' : category
, 'publisher' : publisher
, 'language' : language
}
title = 'Washington Post'
description = 'US political news'
__author__ = 'Kovid Goyal'
use_embedded_content = False
max_articles_per_feed = 20
language = 'en'
encoding = 'utf-8'
keep_only_tags = [dict(attrs={'id':['content','entryhead','entrytext']})]
remove_tags = [
dict(name=['meta','link','iframe','base'])
,dict(attrs={'id':'multimedia-leaf-page'})
]
remove_attributes= ['lang','property','epochtime','datetitle','pagetype','contenttype','comparetime']
remove_javascript = True
no_stylesheets = True
feeds = [
('Politics', 'http://www.washingtonpost.com/rss/politics'),
('Nation', 'http://www.washingtonpost.com/rss/national'),
('World', 'http://www.washingtonpost.com/rss/world'),
('Business', 'http://www.washingtonpost.com/rss/business'),
('Lifestyle', 'http://www.washingtonpost.com/rss/lifestyle'),
('Sports', 'http://www.washingtonpost.com/rss/sports'),
('Redskins', 'http://www.washingtonpost.com/rss/sports/redskins'),
('Opinions', 'http://www.washingtonpost.com/rss/opinions'),
('Entertainment', 'http://www.washingtonpost.com/rss/entertainment'),
('Local', 'http://www.washingtonpost.com/rss/local'),
('Investigations',
'http://www.washingtonpost.com/rss/investigations'),
]
remove_tags = [
{'class':lambda x: x and 'article-toolbar' in x},
{'class':lambda x: x and 'quick-comments' in x},
{'class':lambda x: x and 'tweet' in x},
{'class':lambda x: x and 'article-related' in x},
{'class':lambda x: x and 'hidden' in x.split()},
{'class':lambda x: x and 'also-read' in x.split()},
{'class':lambda x: x and 'partners-content' in x.split()},
{'class':['module share', 'module ads', 'comment-vars', 'hidden',
'share-icons-wrap', 'comments', 'flipper']},
{'id':['right-rail', 'save-and-share']},
{'width':'1', 'height':'1'},
]
keep_only_tags = dict(id=['content', 'article'])
def get_article_url(self, *args):
ans = BasicNewsRecipe.get_article_url(self, *args)
ans = ans.rpartition('?')[0]
if ans.endswith('_video.html'):
return None
if 'ads.pheedo.com' in ans:
return None
#if not ans.endswith('_blog.html'):
# return None
return ans
(u'World' , u'http://feeds.washingtonpost.com/rss/world' )
,(u'National' , u'http://feeds.washingtonpost.com/rss/national' )
,(u'White House' , u'http://feeds.washingtonpost.com/rss/politics/whitehouse' )
,(u'Business' , u'http://feeds.washingtonpost.com/rss/business' )
,(u'Opinions' , u'http://feeds.washingtonpost.com/rss/opinions' )
,(u'Investigations' , u'http://feeds.washingtonpost.com/rss/investigations' )
,(u'Local' , u'http://feeds.washingtonpost.com/rss/local' )
,(u'Entertainment' , u'http://feeds.washingtonpost.com/rss/entertainment' )
,(u'Sports' , u'http://feeds.washingtonpost.com/rss/sports' )
,(u'Redskins' , u'http://feeds.washingtonpost.com/rss/sports/redskins' )
,(u'Special Reports', u'http://feeds.washingtonpost.com/rss/national/special-reports')
]
def print_version(self, url):
return url.replace('_story.html', '_singlePage.html')
if '_story.html' in url:
return url.replace('_story.html','_print.html')
return url
def get_article_url(self, article):
link = BasicNewsRecipe.get_article_url(self,article)
if not 'washingtonpost.com' in link:
self.log('Skipping adds:', link)
return None
for it in ['_video.html','_gallery.html','_links.html']:
if it in link:
self.log('Skipping non-article:', link)
return None
return link

View File

@ -82,7 +82,7 @@ class ZAOBAO(BasicNewsRecipe):
return soup
def parse_feeds(self):
self.log_debug(_('ZAOBAO overrided parse_feeds()'))
self.log(_('ZAOBAO overrided parse_feeds()'))
parsed_feeds = BasicNewsRecipe.parse_feeds(self)
for id, obj in enumerate(self.INDEXES):
@ -99,7 +99,7 @@ class ZAOBAO(BasicNewsRecipe):
a_title = self.tag_to_string(a)
date = ''
description = ''
self.log_debug(_('adding %s at %s')%(a_title,a_url))
self.log(_('adding %s at %s')%(a_title,a_url))
articles.append({
'title':a_title,
'date':date,
@ -110,23 +110,23 @@ class ZAOBAO(BasicNewsRecipe):
pfeeds = feeds_from_index([(title, articles)], oldest_article=self.oldest_article,
max_articles_per_feed=self.max_articles_per_feed)
self.log_debug(_('adding %s to feed')%(title))
self.log(_('adding %s to feed')%(title))
for feed in pfeeds:
self.log_debug(_('adding feed: %s')%(feed.title))
self.log(_('adding feed: %s')%(feed.title))
feed.description = self.DESC_SENSE
parsed_feeds.append(feed)
for a, article in enumerate(feed):
self.log_debug(_('added article %s from %s')%(article.title, article.url))
self.log_debug(_('added feed %s')%(feed.title))
self.log(_('added article %s from %s')%(article.title, article.url))
self.log(_('added feed %s')%(feed.title))
for i, feed in enumerate(parsed_feeds):
# workaorund a strange problem: Somethimes the xml encoding is not apllied correctly by parse()
weired_encoding_detected = False
if not isinstance(feed.description, unicode) and self.encoding and feed.description:
self.log_debug(_('Feed %s is not encoded correctly, manually replace it')%(feed.title))
self.log(_('Feed %s is not encoded correctly, manually replace it')%(feed.title))
feed.description = feed.description.decode(self.encoding, 'replace')
elif feed.description.find(self.DESC_SENSE) == -1 and self.encoding and feed.description:
self.log_debug(_('Feed %s is weired encoded, manually redo all')%(feed.title))
self.log(_('Feed %s is weired encoded, manually redo all')%(feed.title))
feed.description = feed.description.encode('cp1252', 'replace').decode(self.encoding, 'replace')
weired_encoding_detected = True
@ -148,7 +148,7 @@ class ZAOBAO(BasicNewsRecipe):
article.text_summary = article.text_summary.encode('cp1252', 'replace').decode(self.encoding, 'replace')
if article.title == "Untitled article":
self.log_debug(_('Removing empty article %s from %s')%(article.title, article.url))
self.log(_('Removing empty article %s from %s')%(article.title, article.url))
# remove the article
feed.articles[a:a+1] = []
return parsed_feeds

View File

@ -37,18 +37,23 @@ series_index_auto_increment = 'next'
# Can be either True or False
authors_completer_append_separator = False
#: Author sort name algorithm
# The algorithm used to copy author to author_sort
# Possible values are:
# invert: use "fn ln" -> "ln, fn" (the default algorithm)
# invert: use "fn ln" -> "ln, fn"
# copy : copy author to author_sort without modification
# comma : use 'copy' if there is a ',' in the name, otherwise use 'invert'
# nocomma : "fn ln" -> "ln fn" (without the comma)
# When this tweak is changed, the author_sort values stored with each author
# must be recomputed by right-clicking on an author in the left-hand tags pane,
# selecting 'manage authors', and pressing 'Recalculate all author sort values'.
# The author name suffixes are words that are ignored when they occur at the
# end of an author name. The case of the suffix is ignored and trailing
# periods are automatically handled.
author_sort_copy_method = 'comma'
author_name_suffixes = ('Jr', 'Sr', 'Inc', 'Ph.D', 'Phd',
'MD', 'M.D', 'I', 'II', 'III', 'IV',
'Junior', 'Senior')
#: Use author sort in Tag Browser
# Set which author field to display in the tags pane (the list of authors,
@ -65,6 +70,15 @@ author_sort_copy_method = 'comma'
# categories_use_field_for_author_name = 'author_sort'
categories_use_field_for_author_name = 'author'
#: Completion sort order: choose when to change from lexicographic to ASCII-like
# Calibre normally uses locale-dependent lexicographic ordering when showing
# completion values. This means that the sort order is correct for the user's
# language. However, this can be slow. Performance is improved by switching to
# ascii ordering. This tweak controls when that switch happens. Set it to zero
# to always use ascii ordering. Set it to something larger than zero to switch
# to ascii ordering for performance reasons.
completion_change_to_ascii_sorting = 2500
#: Control partitioning of Tag Browser
# When partitioning the tags browser, the format of the subcategory label is
# controlled by a template: categories_collapsed_name_template if sorting by
@ -87,7 +101,6 @@ categories_collapsed_name_template = r'{first.sort:shorten(4,,0)} - {last.sort:s
categories_collapsed_rating_template = r'{first.avg_rating:4.2f:ifempty(0)} - {last.avg_rating:4.2f:ifempty(0)}'
categories_collapsed_popularity_template = r'{first.count:d} - {last.count:d}'
#: Specify columns to sort the booklist by on startup
# Provide a set of columns to be sorted on when calibre starts
# The argument is None if saved sort history is to be used
@ -238,17 +251,14 @@ sony_collection_name_template='{value}{category:| (|)}'
# Default: empty (no rules), so no collection attributes are named.
sony_collection_sorting_rules = []
#: Control how tags are applied when copying books to another library
# Set this to True to ensure that tags in 'Tags to add when adding
# a book' are added when copying books to another library
add_new_book_tags_when_importing_books = False
#: Set the maximum number of tags to show per book in the content server
max_content_server_tags_shown=5
#: Set custom metadata fields that the content server will or will not display.
# content_server_will_display is a list of custom fields to be displayed.
# content_server_wont_display is a list of custom fields not to be displayed.
@ -290,7 +300,6 @@ generate_cover_foot_font = None
# Example: doubleclick_on_library_view = 'do_nothing'
doubleclick_on_library_view = 'open_viewer'
#: Language to use when sorting.
# Setting this tweak will force sorting to use the
# collating order for the specified language. This might be useful if you run
@ -345,3 +354,11 @@ send_news_to_device_location = "main"
# work on all operating systems)
server_listen_on = '0.0.0.0'
#: Unified toolbar on OS X
# If you enable this option and restart calibre, the toolbar will be 'unified'
# with the titlebar as is normal for OS X applications. However, doing this has
# various bugs, for instance the minimum width of the toolbar becomes twice
# what it should be and it causes other random bugs on some systems, so turn it
# on at your own risk!
unified_title_toolbar_on_osx = False

View File

@ -1,34 +1,43 @@
{
"contains": "def evaluate(self, formatter, kwargs, mi, locals,\n val, test, value_if_present, value_if_not):\n if re.search(test, val):\n return value_if_present\n else:\n return value_if_not\n",
"and": "def evaluate(self, formatter, kwargs, mi, locals, *args):\n i = 0\n while i < len(args):\n if not args[i]:\n return ''\n i += 1\n return '1'\n",
"contains": "def evaluate(self, formatter, kwargs, mi, locals,\n val, test, value_if_present, value_if_not):\n if re.search(test, val, flags=re.I):\n return value_if_present\n else:\n return value_if_not\n",
"divide": "def evaluate(self, formatter, kwargs, mi, locals, x, y):\n x = float(x if x else 0)\n y = float(y if y else 0)\n return unicode(x / y)\n",
"uppercase": "def evaluate(self, formatter, kwargs, mi, locals, val):\n return val.upper()\n",
"strcat": "def evaluate(self, formatter, kwargs, mi, locals, *args):\n i = 0\n res = ''\n for i in range(0, len(args)):\n res += args[i]\n return res\n",
"substr": "def evaluate(self, formatter, kwargs, mi, locals, str_, start_, end_):\n return str_[int(start_): len(str_) if int(end_) == 0 else int(end_)]\n",
"in_list": "def evaluate(self, formatter, kwargs, mi, locals, val, sep, pat, fv, nfv):\n l = [v.strip() for v in val.split(sep) if v.strip()]\n if l:\n for v in l:\n if re.search(pat, v, flags=re.I):\n return fv\n return nfv\n",
"not": "def evaluate(self, formatter, kwargs, mi, locals, *args):\n i = 0\n while i < len(args):\n if args[i]:\n return '1'\n i += 1\n return ''\n",
"ifempty": "def evaluate(self, formatter, kwargs, mi, locals, val, value_if_empty):\n if val:\n return val\n else:\n return value_if_empty\n",
"booksize": "def evaluate(self, formatter, kwargs, mi, locals):\n if mi.book_size is not None:\n try:\n return str(mi.book_size)\n except:\n pass\n return ''\n",
"select": "def evaluate(self, formatter, kwargs, mi, locals, val, key):\n if not val:\n return ''\n vals = [v.strip() for v in val.split(',')]\n for v in vals:\n if v.startswith(key+':'):\n return v[len(key)+1:]\n return ''\n",
"field": "def evaluate(self, formatter, kwargs, mi, locals, name):\n return formatter.get_value(name, [], kwargs)\n",
"strcmp": "def evaluate(self, formatter, kwargs, mi, locals, x, y, lt, eq, gt):\n v = strcmp(x, y)\n if v < 0:\n return lt\n if v == 0:\n return eq\n return gt\n",
"first_non_empty": "def evaluate(self, formatter, kwargs, mi, locals, *args):\n i = 0\n while i < len(args):\n if args[i]:\n return args[i]\n i += 1\n return ''\n",
"re": "def evaluate(self, formatter, kwargs, mi, locals, val, pattern, replacement):\n return re.sub(pattern, replacement, val, flags=re.I)\n",
"subtract": "def evaluate(self, formatter, kwargs, mi, locals, x, y):\n x = float(x if x else 0)\n y = float(y if y else 0)\n return unicode(x - y)\n",
"list_item": "def evaluate(self, formatter, kwargs, mi, locals, val, index, sep):\n if not val:\n return ''\n index = int(index)\n val = val.split(sep)\n try:\n return val[index]\n except:\n return ''\n",
"shorten": "def evaluate(self, formatter, kwargs, mi, locals,\n val, leading, center_string, trailing):\n l = max(0, int(leading))\n t = max(0, int(trailing))\n if len(val) > l + len(center_string) + t:\n return val[0:l] + center_string + ('' if t == 0 else val[-t:])\n else:\n return val\n",
"re": "def evaluate(self, formatter, kwargs, mi, locals, val, pattern, replacement):\n return re.sub(pattern, replacement, val)\n",
"field": "def evaluate(self, formatter, kwargs, mi, locals, name):\n return formatter.get_value(name, [], kwargs)\n",
"add": "def evaluate(self, formatter, kwargs, mi, locals, x, y):\n x = float(x if x else 0)\n y = float(y if y else 0)\n return unicode(x + y)\n",
"lookup": "def evaluate(self, formatter, kwargs, mi, locals, val, *args):\n if len(args) == 2: # here for backwards compatibility\n if val:\n return formatter.vformat('{'+args[0].strip()+'}', [], kwargs)\n else:\n return formatter.vformat('{'+args[1].strip()+'}', [], kwargs)\n if (len(args) % 2) != 1:\n raise ValueError(_('lookup requires either 2 or an odd number of arguments'))\n i = 0\n while i < len(args):\n if i + 1 >= len(args):\n return formatter.vformat('{' + args[i].strip() + '}', [], kwargs)\n if re.search(args[i], val):\n return formatter.vformat('{'+args[i+1].strip() + '}', [], kwargs)\n i += 2\n",
"lookup": "def evaluate(self, formatter, kwargs, mi, locals, val, *args):\n if len(args) == 2: # here for backwards compatibility\n if val:\n return formatter.vformat('{'+args[0].strip()+'}', [], kwargs)\n else:\n return formatter.vformat('{'+args[1].strip()+'}', [], kwargs)\n if (len(args) % 2) != 1:\n raise ValueError(_('lookup requires either 2 or an odd number of arguments'))\n i = 0\n while i < len(args):\n if i + 1 >= len(args):\n return formatter.vformat('{' + args[i].strip() + '}', [], kwargs)\n if re.search(args[i], val, flags=re.I):\n return formatter.vformat('{'+args[i+1].strip() + '}', [], kwargs)\n i += 2\n",
"template": "def evaluate(self, formatter, kwargs, mi, locals, template):\n template = template.replace('[[', '{').replace(']]', '}')\n return formatter.__class__().safe_format(template, kwargs, 'TEMPLATE', mi)\n",
"print": "def evaluate(self, formatter, kwargs, mi, locals, *args):\n print args\n return None\n",
"merge_lists": "def evaluate(self, formatter, kwargs, mi, locals, list1, list2, separator):\n l1 = [l.strip() for l in list1.split(separator) if l.strip()]\n l2 = [l.strip() for l in list2.split(separator) if l.strip()]\n lcl1 = set([icu_lower(l) for l in l1])\n res = []\n for i in l1:\n res.append(i)\n for i in l2:\n if icu_lower(i) not in lcl1:\n res.append(i)\n return ', '.join(sorted(res, key=sort_key))\n",
"str_in_list": "def evaluate(self, formatter, kwargs, mi, locals, val, sep, str, fv, nfv):\n l = [v.strip() for v in val.split(sep) if v.strip()]\n c = [v.strip() for v in str.split(sep) if v.strip()]\n if l:\n for v in l:\n for t in c:\n if strcmp(t, v) == 0:\n return fv\n return nfv\n",
"titlecase": "def evaluate(self, formatter, kwargs, mi, locals, val):\n return titlecase(val)\n",
"subitems": "def evaluate(self, formatter, kwargs, mi, locals, val, start_index, end_index):\n if not val:\n return ''\n si = int(start_index)\n ei = int(end_index)\n items = [v.strip() for v in val.split(',')]\n rv = set()\n for item in items:\n component = item.split('.')\n try:\n if ei == 0:\n rv.add('.'.join(component[si:]))\n else:\n rv.add('.'.join(component[si:ei]))\n except:\n pass\n return ', '.join(sorted(rv, key=sort_key))\n",
"sublist": "def evaluate(self, formatter, kwargs, mi, locals, val, start_index, end_index, sep):\n if not val:\n return ''\n si = int(start_index)\n ei = int(end_index)\n val = val.split(sep)\n try:\n if ei == 0:\n return sep.join(val[si:])\n else:\n return sep.join(val[si:ei])\n except:\n return ''\n",
"test": "def evaluate(self, formatter, kwargs, mi, locals, val, value_if_set, value_not_set):\n if val:\n return value_if_set\n else:\n return value_not_set\n",
"eval": "def evaluate(self, formatter, kwargs, mi, locals, template):\n from formatter import eval_formatter\n template = template.replace('[[', '{').replace(']]', '}')\n return eval_formatter.safe_format(template, locals, 'EVAL', None)\n",
"multiply": "def evaluate(self, formatter, kwargs, mi, locals, x, y):\n x = float(x if x else 0)\n y = float(y if y else 0)\n return unicode(x * y)\n",
"format_date": "def evaluate(self, formatter, kwargs, mi, locals, val, format_string):\n if not val:\n return ''\n try:\n dt = parse_date(val)\n s = format_date(dt, format_string)\n except:\n s = 'BAD DATE'\n return s\n",
"format_date": "def evaluate(self, formatter, kwargs, mi, locals, val, format_string):\n if not val or val == 'None':\n return ''\n try:\n dt = parse_date(val)\n s = format_date(dt, format_string)\n except:\n s = 'BAD DATE'\n return s\n",
"capitalize": "def evaluate(self, formatter, kwargs, mi, locals, val):\n return capitalize(val)\n",
"identifier_in_list": "def evaluate(self, formatter, kwargs, mi, locals, val, ident, fv, nfv):\n l = [v.strip() for v in val.split(',') if v.strip()]\n (id, _, regexp) = ident.partition(':')\n if not id:\n return nfv\n id += ':'\n if l:\n for v in l:\n if v.startswith(id):\n if not regexp or re.search(regexp, v[len(id):], flags=re.I):\n return fv\n return nfv\n",
"count": "def evaluate(self, formatter, kwargs, mi, locals, val, sep):\n return unicode(len(val.split(sep)))\n",
"lowercase": "def evaluate(self, formatter, kwargs, mi, locals, val):\n return val.lower()\n",
"strcmp": "def evaluate(self, formatter, kwargs, mi, locals, x, y, lt, eq, gt):\n v = strcmp(x, y)\n if v < 0:\n return lt\n if v == 0:\n return eq\n return gt\n",
"switch": "def evaluate(self, formatter, kwargs, mi, locals, val, *args):\n if (len(args) % 2) != 1:\n raise ValueError(_('switch requires an odd number of arguments'))\n i = 0\n while i < len(args):\n if i + 1 >= len(args):\n return args[i]\n if re.search(args[i], val):\n return args[i+1]\n i += 2\n",
"substr": "def evaluate(self, formatter, kwargs, mi, locals, str_, start_, end_):\n return str_[int(start_): len(str_) if int(end_) == 0 else int(end_)]\n",
"or": "def evaluate(self, formatter, kwargs, mi, locals, *args):\n i = 0\n while i < len(args):\n if args[i]:\n return '1'\n i += 1\n return ''\n",
"switch": "def evaluate(self, formatter, kwargs, mi, locals, val, *args):\n if (len(args) % 2) != 1:\n raise ValueError(_('switch requires an odd number of arguments'))\n i = 0\n while i < len(args):\n if i + 1 >= len(args):\n return args[i]\n if re.search(args[i], val, flags=re.I):\n return args[i+1]\n i += 2\n",
"ondevice": "def evaluate(self, formatter, kwargs, mi, locals):\n if mi.ondevice_col:\n return _('Yes')\n return ''\n",
"assign": "def evaluate(self, formatter, kwargs, mi, locals, target, value):\n locals[target] = value\n return value\n",
"raw_field": "def evaluate(self, formatter, kwargs, mi, locals, name):\n return unicode(getattr(mi, name, None))\n",
"cmp": "def evaluate(self, formatter, kwargs, mi, locals, x, y, lt, eq, gt):\n x = float(x if x else 0)\n y = float(y if y else 0)\n if x < y:\n return lt\n if x == y:\n return eq\n return gt\n"
"cmp": "def evaluate(self, formatter, kwargs, mi, locals, x, y, lt, eq, gt):\n x = float(x if x and x != 'None' else 0)\n y = float(y if y and y != 'None' else 0)\n if x < y:\n return lt\n if x == y:\n return eq\n return gt\n"
}

View File

@ -12,7 +12,9 @@ is64bit = platform.architecture()[0] == '64bit'
iswindows = re.search('win(32|64)', sys.platform)
isosx = 'darwin' in sys.platform
isfreebsd = 'freebsd' in sys.platform
islinux = not isosx and not iswindows and not isfreebsd
isnetbsd = 'netbsd' in sys.platform
isbsd = isnetbsd or isfreebsd
islinux = not isosx and not iswindows and not isbsd
SRC = os.path.abspath('src')
sys.path.insert(0, SRC)
sys.resources_location = os.path.join(os.path.dirname(SRC), 'resources')

View File

@ -11,7 +11,7 @@ from distutils import sysconfig
from PyQt4.pyqtconfig import QtGuiModuleMakefile
from setup import Command, islinux, isfreebsd, isosx, SRC, iswindows
from setup import Command, islinux, isfreebsd, isbsd, isosx, SRC, iswindows
from setup.build_environment import fc_inc, fc_lib, chmlib_inc_dirs, \
fc_error, poppler_libs, poppler_lib_dirs, poppler_inc_dirs, podofo_inc, \
podofo_lib, podofo_error, poppler_error, pyqt, OSX_SDK, NMAKE, \
@ -21,7 +21,7 @@ from setup.build_environment import fc_inc, fc_lib, chmlib_inc_dirs, \
jpg_lib_dirs, chmlib_lib_dirs, sqlite_inc_dirs, icu_inc_dirs, \
icu_lib_dirs
MT
isunix = islinux or isosx or isfreebsd
isunix = islinux or isosx or isbsd
make = 'make' if isunix else NMAKE
@ -205,7 +205,7 @@ if islinux:
ldflags.append('-lpython'+sysconfig.get_python_version())
if isfreebsd:
if isbsd:
cflags.append('-pthread')
ldflags.append('-shared')
cflags.append('-I'+sysconfig.get_python_inc())

View File

@ -8,7 +8,7 @@ __docformat__ = 'restructuredtext en'
import sys, os, textwrap, subprocess, shutil, tempfile, atexit, stat, shlex
from setup import Command, islinux, isfreebsd, basenames, modules, functions, \
from setup import Command, islinux, isfreebsd, isbsd, basenames, modules, functions, \
__appname__, __version__
HEADER = '''\
@ -116,7 +116,7 @@ class Develop(Command):
def pre_sub_commands(self, opts):
if not (islinux or isfreebsd):
if not (islinux or isbsd):
self.info('\nSetting up a source based development environment is only '
'supported on linux. On other platforms, see the User Manual'
' for help with setting up a development environment.')
@ -156,7 +156,7 @@ class Develop(Command):
self.warn('Failed to compile mount helper. Auto mounting of',
' devices will not work')
if not isfreebsd and os.geteuid() != 0:
if not isbsd and os.geteuid() != 0:
return self.warn('Must be run as root to compile mount helper. Auto '
'mounting of devices will not work.')
src = os.path.join(self.SRC, 'calibre', 'devices', 'linux_mount_helper.c')
@ -168,7 +168,7 @@ class Develop(Command):
ret = p.wait()
if ret != 0:
return warn()
if not isfreebsd:
if not isbsd:
os.chown(dest, 0, 0)
os.chmod(dest, stat.S_ISUID|stat.S_ISGID|stat.S_IRUSR|stat.S_IWUSR|\
stat.S_IXUSR|stat.S_IXGRP|stat.S_IXOTH)

View File

@ -32,7 +32,6 @@ class Win32(VMInstaller):
FREEZE_TEMPLATE = 'python -OO setup.py {freeze_command} --no-ice'
INSTALLER_EXT = 'msi'
SHUTDOWN_CMD = ['shutdown.exe', '-s', '-f', '-t', '0']
BUILD_BUILD = ['python setup.py kakasi',] + VMInstaller.BUILD_BUILD
def download_installer(self):
installer = self.installer()

View File

@ -17,6 +17,7 @@
IncludeMaximum="yes"
OnlyDetect="no"
Language="1033"
MigrateFeatures="yes"
Property="OLDPRODUCTFOUND"/>
<UpgradeVersion Minimum="{version}"
IncludeMinimum="no"
@ -26,6 +27,11 @@
</Upgrade>
<CustomAction Id="PreventDowngrading" Error="Newer version already installed."/>
<Property Id="APPLICATIONFOLDER">
<RegistrySearch Id='calibreInstDir' Type='raw'
Root='HKLM' Key="Software\{app}\Installer" Name="InstallPath" />
</Property>
<Directory Id='TARGETDIR' Name='SourceDir'>
<Merge Id="VCRedist" SourceFile="{crt_msm}" DiskId="1" Language="0"/>
<Directory Id='ProgramFilesFolder' Name='PFiles'>
@ -43,6 +49,9 @@
<Environment Id='UpdatePath' Name='PATH' Action='set' System='yes' Part='last' Value='[APPLICATIONFOLDER]' />
<RegistryValue Root="HKCU" Key="Software\Microsoft\{app}" Name="system_path_updated" Type="integer" Value="1" KeyPath="yes"/>
</Component>
<Component Id="RememberInstallDir" Guid="*">
<RegistryValue Root="HKLM" Key="Software\{app}\Installer" Name="InstallPath" Type="string" Value="[APPLICATIONFOLDER]" KeyPath="yes"/>
</Component>
</DirectoryRef>
<DirectoryRef Id="ApplicationProgramsFolder">
@ -61,7 +70,7 @@
WorkingDirectory="APPLICATIONROOTDIRECTORY" />
<util:InternetShortcut Id="OnlineDocumentationShortcut"
Name="User Manual" Type="url"
Target="http://calibre-ebook.com/user_manual"/>
Target="http://manual.calibre-ebook.com"/>
<util:InternetShortcut Id="GetInvolvedS"
Name="Get Involved" Type="url"
Target="http://calibre-ebook.com/get-involved"/>
@ -87,7 +96,8 @@
ConfigurableDirectory="APPLICATIONFOLDER">
<Feature Id="MainApplication" Title="Program Files" Level="1"
Description="All the files need to run {app}" Absent="disallow">
Description="All the files needed to run {app}" Absent="disallow">
<ComponentRef Id="RememberInstallDir"/>
</Feature>
<Feature Id="VCRedist" Title="Visual C++ 8.0 Runtime" AllowAdvertise="no" Display="hidden" Level="1">
@ -115,7 +125,7 @@
<Property Id="ARPPRODUCTICON" Value="main_icon" />
<Condition
Message="This application is only supported on Windows XP SP2, or higher.">
Message="This application is only supported on Windows XP SP3, or higher.">
<![CDATA[Installed OR (VersionNT >= 501)]]>
</Condition>
<InstallExecuteSequence>

View File

@ -187,7 +187,6 @@ msgstr ""
'''%dict(appname=__appname__, version=version, year=time.strftime('%Y'))
def usage(code, msg=''):
print >> sys.stderr, __doc__ % globals()
if msg:

View File

@ -6,10 +6,10 @@ __license__ = 'GPL v3'
__copyright__ = '2009, Kovid Goyal <kovid@kovidgoyal.net>'
__docformat__ = 'restructuredtext en'
import os, cPickle, re, anydbm, shutil, marshal, zipfile, glob
import os, cPickle, re, shutil, marshal, zipfile, glob
from zlib import compress
from setup import Command, basenames, __appname__, iswindows
from setup import Command, basenames, __appname__
def get_opts_from_parser(parser):
def do_opt(opt):
@ -34,12 +34,12 @@ class Kakasi(Command):
self.records = {}
src = self.j(self.KAKASI_PATH, 'kakasidict.utf8')
dest = self.j(self.RESOURCES, 'localization',
'pykakasi','kanwadict2.db')
'pykakasi','kanwadict2.pickle')
base = os.path.dirname(dest)
if not os.path.exists(base):
os.makedirs(base)
if self.newer(dest, src) or iswindows:
if self.newer(dest, src):
self.info('\tGenerating Kanwadict')
for line in open(src, "r"):
@ -50,7 +50,7 @@ class Kakasi(Command):
dest = self.j(self.RESOURCES, 'localization',
'pykakasi','itaijidict2.pickle')
if self.newer(dest, src) or iswindows:
if self.newer(dest, src):
self.info('\tGenerating Itaijidict')
self.mkitaiji(src, dest)
@ -58,7 +58,7 @@ class Kakasi(Command):
dest = self.j(self.RESOURCES, 'localization',
'pykakasi','kanadict2.pickle')
if self.newer(dest, src) or iswindows:
if self.newer(dest, src):
self.info('\tGenerating kanadict')
self.mkkanadict(src, dest)
@ -75,7 +75,7 @@ class Kakasi(Command):
continue
pair = re.sub(r'\\u([0-9a-fA-F]{4})', lambda x:unichr(int(x.group(1),16)), line)
dic[pair[0]] = pair[1]
cPickle.dump(dic, open(dst, 'w'), protocol=-1) #pickle
cPickle.dump(dic, open(dst, 'wb'), protocol=-1) #pickle
def mkkanadict(self, src, dst):
dic = {}
@ -87,7 +87,7 @@ class Kakasi(Command):
continue
(alpha, kana) = line.split(' ')
dic[kana] = alpha
cPickle.dump(dic, open(dst, 'w'), protocol=-1) #pickle
cPickle.dump(dic, open(dst, 'wb'), protocol=-1) #pickle
def parsekdict(self, line):
line = line.decode("utf-8").strip()
@ -115,16 +115,11 @@ class Kakasi(Command):
self.records[key][kanji]=[(yomi, tail)]
def kanwaout(self, out):
try:
# Needed as otherwise anydbm tries to create a gdbm db when the db
# created on Unix is found
os.remove(out)
except:
pass
dic = anydbm.open(out, 'n')
for (k, v) in self.records.iteritems():
dic[k] = compress(marshal.dumps(v))
dic.close()
with open(out, 'wb') as f:
dic = {}
for k, v in self.records.iteritems():
dic[k] = compress(marshal.dumps(v))
cPickle.dump(dic, f, -1)
def clean(self):
kakasi = self.j(self.RESOURCES, 'localization', 'pykakasi')

View File

@ -85,7 +85,7 @@ class Translations(POT):
def mo_file(self, po_file):
locale = os.path.splitext(os.path.basename(po_file))[0]
return locale, os.path.join(self.DEST, locale, 'LC_MESSAGES', 'messages.mo')
return locale, os.path.join(self.DEST, locale, 'messages.mo')
def run(self, opts):
@ -94,9 +94,8 @@ class Translations(POT):
base = os.path.dirname(dest)
if not os.path.exists(base):
os.makedirs(base)
if self.newer(dest, f):
self.info('\tCompiling translations for', locale)
subprocess.check_call(['msgfmt', '-o', dest, f])
self.info('\tCompiling translations for', locale)
subprocess.check_call(['msgfmt', '-o', dest, f])
if locale in ('en_GB', 'nds', 'te', 'yi'):
continue
pycountry = self.j(sysconfig.get_python_lib(), 'pycountry',
@ -123,6 +122,16 @@ class Translations(POT):
shutil.copy2(f, dest)
self.write_stats()
self.freeze_locales()
def freeze_locales(self):
zf = self.DEST + '.zip'
from calibre import CurrentDir
from calibre.utils.zipfile import ZipFile, ZIP_DEFLATED
with ZipFile(zf, 'w', ZIP_DEFLATED) as zf:
with CurrentDir(self.DEST):
zf.add_dir('.')
shutil.rmtree(self.DEST)
@property
def stats(self):

View File

@ -16,7 +16,7 @@ from setup import Command, __version__, installer_name, __appname__
PREFIX = "/var/www/calibre-ebook.com"
DOWNLOADS = PREFIX+"/htdocs/downloads"
BETAS = DOWNLOADS +'/betas'
USER_MANUAL = PREFIX+'/htdocs/user_manual'
USER_MANUAL = '/var/www/localhost/htdocs/'
HTML2LRF = "calibre/ebooks/lrf/html/demo"
TXT2LRF = "src/calibre/ebooks/lrf/txt/demo"
MOBILEREAD = 'ftp://dev.mobileread.com/calibre/'
@ -365,7 +365,7 @@ class UploadUserManual(Command): # {{{
self.build_plugin_example(x)
check_call(' '.join(['scp', '-r', 'src/calibre/manual/.build/html/*',
'divok:%s'%USER_MANUAL]), shell=True)
'bugs:%s'%USER_MANUAL]), shell=True)
# }}}
class UploadDemo(Command): # {{{

View File

@ -12,10 +12,10 @@ from functools import partial
warnings.simplefilter('ignore', DeprecationWarning)
from calibre.constants import (iswindows, isosx, islinux, isfreebsd, isfrozen,
preferred_encoding, __appname__, __version__, __author__,
win32event, win32api, winerror, fcntl,
filesystem_encoding, plugins, config_dir)
from calibre.constants import (iswindows, isosx, islinux, isfrozen,
isbsd, preferred_encoding, __appname__, __version__, __author__,
win32event, win32api, winerror, fcntl,
filesystem_encoding, plugins, config_dir)
from calibre.startup import winutil, winutilerror
if False and islinux and not getattr(sys, 'frozen', False):
@ -31,7 +31,7 @@ if False:
# Prevent pyflakes from complaining
winutil, winutilerror, __appname__, islinux, __version__
fcntl, win32event, isfrozen, __author__
winerror, win32api, isfreebsd
winerror, win32api, isbsd
_mt_inited = False
def _init_mimetypes():
@ -630,6 +630,24 @@ def human_readable(size):
size = size[:-2]
return size + " " + suffix
def remove_bracketed_text(src,
brackets={u'(':u')', u'[':u']', u'{':u'}'}):
from collections import Counter
counts = Counter()
buf = []
src = force_unicode(src)
rmap = dict([(v, k) for k, v in brackets.iteritems()])
for char in src:
if char in brackets:
counts[char] += 1
elif char in rmap:
idx = rmap[char]
if counts[idx] > 0:
counts[idx] -= 1
elif sum(counts.itervalues()) < 1:
buf.append(char)
return u''.join(buf)
if isosx:
import glob, shutil
fdir = os.path.expanduser('~/.fonts')

View File

@ -4,7 +4,7 @@ __license__ = 'GPL v3'
__copyright__ = '2008, Kovid Goyal kovid@kovidgoyal.net'
__docformat__ = 'restructuredtext en'
__appname__ = u'calibre'
numeric_version = (0, 8, 1)
numeric_version = (0, 8, 4)
__version__ = u'.'.join(map(unicode, numeric_version))
__author__ = u"Kovid Goyal <kovid@kovidgoyal.net>"
@ -27,7 +27,9 @@ iswindows = 'win32' in _plat or 'win64' in _plat
isosx = 'darwin' in _plat
isnewosx = isosx and getattr(sys, 'new_app_bundle', False)
isfreebsd = 'freebsd' in _plat
islinux = not(iswindows or isosx or isfreebsd)
isnetbsd = 'netbsd' in _plat
isbsd = isfreebsd or isnetbsd
islinux = not(iswindows or isosx or isbsd)
isfrozen = hasattr(sys, 'frozen')
isunix = isosx or islinux

View File

@ -607,10 +607,25 @@ class StoreBase(Plugin): # {{{
supported_platforms = ['windows', 'osx', 'linux']
author = 'John Schember'
type = _('Store')
# Information about the store. Should be in the primary language
# of the store. This should not be translatable when set by
# a subclass.
description = _('An ebook store.')
minimum_calibre_version = (0, 8, 0)
version = (1, 0, 1)
actual_plugin = None
# Does the store only distribute ebooks without DRM.
drm_free_only = False
# This is the 2 letter country code for the corporate
# headquarters of the store.
headquarters = ''
# All formats the store distributes ebooks in.
formats = []
# Is this store on an affiliate program?
affiliate = False
def load_actual_plugin(self, gui):
'''
This method must return the actual interface action plugin object.

View File

@ -1,4 +1,5 @@
import os.path
# -*- coding: utf-8 -*-
__license__ = 'GPL v3'
__copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
@ -593,7 +594,7 @@ from calibre.devices.iliad.driver import ILIAD
from calibre.devices.irexdr.driver import IREXDR1000, IREXDR800
from calibre.devices.jetbook.driver import JETBOOK, MIBUK, JETBOOK_MINI
from calibre.devices.kindle.driver import KINDLE, KINDLE2, KINDLE_DX
from calibre.devices.nook.driver import NOOK, NOOK_COLOR
from calibre.devices.nook.driver import NOOK, NOOK_COLOR, NOOK_TSR
from calibre.devices.prs505.driver import PRS505
from calibre.devices.user_defined.driver import USER_DEFINED
from calibre.devices.android.driver import ANDROID, S60
@ -692,8 +693,7 @@ plugins += [
KINDLE,
KINDLE2,
KINDLE_DX,
NOOK,
NOOK_COLOR,
NOOK, NOOK_COLOR, NOOK_TSR,
PRS505,
ANDROID,
S60,
@ -854,6 +854,17 @@ class ActionStore(InterfaceActionBase):
author = 'John Schember'
actual_plugin = 'calibre.gui2.actions.store:StoreAction'
def customization_help(self, gui=False):
return 'Customize the behavior of the store search.'
def config_widget(self):
from calibre.gui2.store.config.store import config_widget as get_cw
return get_cw()
def save_settings(self, config_widget):
from calibre.gui2.store.config.store import save_settings as save
save(config_widget)
plugins += [ActionAdd, ActionFetchAnnotations, ActionGenerateCatalog,
ActionConvert, ActionDelete, ActionEditMetadata, ActionView,
ActionFetchNews, ActionSaveToDisk, ActionShowBookDetails,
@ -1094,121 +1105,353 @@ plugins += [LookAndFeel, Behavior, Columns, Toolbar, Search, InputOptions,
# Store plugins {{{
class StoreAmazonKindleStore(StoreBase):
name = 'Amazon Kindle'
description = _('Kindle books from Amazon')
description = u'Kindle books from Amazon.'
actual_plugin = 'calibre.gui2.store.amazon_plugin:AmazonKindleStore'
headquarters = 'US'
formats = ['KINDLE']
affiliate = True
class StoreAmazonDEKindleStore(StoreBase):
name = 'Amazon DE Kindle'
description = _('Kindle eBooks')
author = 'Charles Haley'
description = u'Kindle Bücher von Amazon.'
actual_plugin = 'calibre.gui2.store.amazon_de_plugin:AmazonDEKindleStore'
headquarters = 'DE'
formats = ['KINDLE']
affiliate = True
class StoreAmazonUKKindleStore(StoreBase):
name = 'Amazon UK Kindle'
description = _('Kindle books from Amazon.uk')
author = 'Charles Haley'
description = u'Kindle books from Amazon\'s UK web site. Also, includes French language ebooks.'
actual_plugin = 'calibre.gui2.store.amazon_uk_plugin:AmazonUKKindleStore'
headquarters = 'UK'
formats = ['KINDLE']
affiliate = True
class StoreArchiveOrgStore(StoreBase):
name = 'Archive.org'
description = u'An Internet library offering permanent access for researchers, historians, scholars, people with disabilities, and the general public to historical collections that exist in digital format.'
actual_plugin = 'calibre.gui2.store.archive_org_plugin:ArchiveOrgStore'
drm_free_only = True
headquarters = 'US'
formats = ['DAISY', 'DJVU', 'EPUB', 'MOBI', 'PDF', 'TXT']
class StoreBaenWebScriptionStore(StoreBase):
name = 'Baen WebScription'
description = _('Ebooks for readers.')
description = u'Sci-Fi & Fantasy brought to you by Jim Baen.'
actual_plugin = 'calibre.gui2.store.baen_webscription_plugin:BaenWebScriptionStore'
drm_free_only = True
headquarters = 'US'
formats = ['EPUB', 'LIT', 'LRF', 'MOBI', 'RB', 'RTF', 'ZIP']
class StoreBNStore(StoreBase):
name = 'Barnes and Noble'
description = _('Books, Textbooks, eBooks, Toys, Games and More.')
description = u'The world\'s largest book seller. As the ultimate destination for book lovers, Barnes & Noble.com offers an incredible array of content.'
actual_plugin = 'calibre.gui2.store.bn_plugin:BNStore'
headquarters = 'US'
formats = ['NOOK']
affiliate = True
class StoreBeamEBooksDEStore(StoreBase):
name = 'Beam EBooks DE'
description = _('der eBook Shop')
author = 'Charles Haley'
description = u'Bei uns finden Sie: Tausende deutschsprachige eBooks; Alle eBooks ohne hartes DRM; PDF, ePub und Mobipocket Format; Sofortige Verfügbarkeit - 24 Stunden am Tag; Günstige Preise; eBooks für viele Lesegeräte, PC,Mac und Smartphones; Viele Gratis eBooks'
actual_plugin = 'calibre.gui2.store.beam_ebooks_de_plugin:BeamEBooksDEStore'
drm_free_only = True
headquarters = 'DE'
formats = ['EPUB', 'MOBI', 'PDF']
affiliate = True
class StoreBeWriteStore(StoreBase):
name = 'BeWrite Books'
description = _('Publishers of fine books.')
description = u'Publishers of fine books. Highly selective and editorially driven. Does not offer: books for children or exclusively YA, erotica, swords-and-sorcery fantasy and space-opera-style science fiction. All other genres are represented.'
actual_plugin = 'calibre.gui2.store.bewrite_plugin:BeWriteStore'
drm_free_only = True
headquarters = 'US'
formats = ['EPUB', 'MOBI', 'PDF']
class StoreDieselEbooksStore(StoreBase):
name = 'Diesel eBooks'
description = _('World Famous eBook Store.')
description = u'Instant access to over 2.4 million titles from hundreds of publishers including Harlequin, HarperCollins, John Wiley & Sons, McGraw-Hill, Simon & Schuster and Random House.'
actual_plugin = 'calibre.gui2.store.diesel_ebooks_plugin:DieselEbooksStore'
headquarters = 'US'
formats = ['EPUB', 'PDF']
affiliate = True
class StoreEbookscomStore(StoreBase):
name = 'eBooks.com'
description = _('The digital bookstore.')
description = u'Sells books in multiple electronic formats in all categories. Technical infrastructure is cutting edge, robust and scalable, with servers in the US and Europe.'
actual_plugin = 'calibre.gui2.store.ebooks_com_plugin:EbookscomStore'
headquarters = 'US'
formats = ['EPUB', 'LIT', 'MOBI', 'PDF']
affiliate = True
class StoreEPubBuyDEStore(StoreBase):
name = 'EPUBBuy DE'
description = _('EPUBReaders eBook Shop')
author = 'Charles Haley'
description = u'Bei EPUBBuy.com finden Sie ausschliesslich eBooks im weitverbreiteten EPUB-Format und ohne DRM. So haben Sie die freie Wahl, wo Sie Ihr eBook lesen: Tablet, eBook-Reader, Smartphone oder einfach auf Ihrem PC. So macht eBook-Lesen Spaß!'
actual_plugin = 'calibre.gui2.store.epubbuy_de_plugin:EPubBuyDEStore'
drm_free_only = True
headquarters = 'DE'
formats = ['EPUB']
affiliate = True
class StoreEBookShoppeUKStore(StoreBase):
name = 'ebookShoppe UK'
author = u'Charles Haley'
description = u'We made this website in an attempt to offer the widest range of UK eBooks possible across and as many formats as we could manage.'
actual_plugin = 'calibre.gui2.store.ebookshoppe_uk_plugin:EBookShoppeUKStore'
headquarters = 'UK'
formats = ['EPUB', 'PDF']
affiliate = True
class StoreEHarlequinStore(StoreBase):
name = 'eHarlequin'
description = _('entertain, enrich, inspire.')
description = u'A global leader in series romance and one of the world\'s leading publishers of books for women. Offers women a broad range of reading from romance to bestseller fiction, from young adult novels to erotic literature, from nonfiction to fantasy, from African-American novels to inspirational romance, and more.'
actual_plugin = 'calibre.gui2.store.eharlequin_plugin:EHarlequinStore'
headquarters = 'CA'
formats = ['EPUB', 'PDF']
affiliate = True
class StoreEpubBudStore(StoreBase):
name = 'ePub Bud'
description = 'Well, it\'s pretty much just "YouTube for Children\'s eBooks. A not-for-profit organization devoted to brining self published childrens books to the world.'
actual_plugin = 'calibre.gui2.store.epubbud_plugin:EpubBudStore'
drm_free_only = True
headquarters = 'US'
formats = ['EPUB']
class StoreFeedbooksStore(StoreBase):
name = 'Feedbooks'
description = _('Read anywhere.')
description = u'Feedbooks is a cloud publishing and distribution service, connected to a large ecosystem of reading systems and social networks. Provides a variety of genres from independent and classic books.'
actual_plugin = 'calibre.gui2.store.feedbooks_plugin:FeedbooksStore'
headquarters = 'FR'
formats = ['EPUB', 'MOBI', 'PDF']
class StoreFoylesUKStore(StoreBase):
name = 'Foyles UK'
description = _('Foyles of London, online')
author = 'Charles Haley'
description = u'Foyles of London\'s ebook store. Provides extensive range covering all subjects.'
actual_plugin = 'calibre.gui2.store.foyles_uk_plugin:FoylesUKStore'
headquarters = 'UK'
formats = ['EPUB', 'PDF']
affiliate = True
class StoreGandalfStore(StoreBase):
name = 'Gandalf'
author = u'Tomasz Długosz'
description = u'Księgarnia internetowa Gandalf.'
actual_plugin = 'calibre.gui2.store.gandalf_plugin:GandalfStore'
headquarters = 'PL'
formats = ['EPUB', 'PDF']
class StoreGoogleBooksStore(StoreBase):
name = 'Google Books'
description = u'Google Books'
actual_plugin = 'calibre.gui2.store.google_books_plugin:GoogleBooksStore'
headquarters = 'US'
formats = ['EPUB', 'PDF', 'TXT']
class StoreGutenbergStore(StoreBase):
name = 'Project Gutenberg'
description = _('The first producer of free ebooks.')
description = u'The first producer of free ebooks. Free in the United States because their copyright has expired. They may not be free of copyright in other countries. Readers outside of the United States must check the copyright laws of their countries before downloading or redistributing our ebooks.'
actual_plugin = 'calibre.gui2.store.gutenberg_plugin:GutenbergStore'
drm_free_only = True
headquarters = 'US'
formats = ['EPUB', 'HTML', 'MOBI', 'PDB', 'TXT']
class StoreKoboStore(StoreBase):
name = 'Kobo'
description = _('eReading: anytime. anyplace.')
description = u'With over 2.3 million eBooks to browse we have engaged readers in over 200 countries in Kobo eReading. Our eBook listings include New York Times Bestsellers, award winners, classics and more!'
actual_plugin = 'calibre.gui2.store.kobo_plugin:KoboStore'
headquarters = 'CA'
formats = ['EPUB']
affiliate = True
class StoreLegimiStore(StoreBase):
name = 'Legimi'
author = u'Tomasz Długosz'
description = u'Tanie oraz darmowe ebooki, egazety i blogi w formacie EPUB, wprost na Twój e-czytnik, iPhone, iPad, Android i komputer'
actual_plugin = 'calibre.gui2.store.legimi_plugin:LegimiStore'
headquarters = 'PL'
formats = ['EPUB']
class StoreManyBooksStore(StoreBase):
name = 'ManyBooks'
description = _('The best ebooks at the best price: free!')
description = u'Public domain and creative commons works from many sources.'
actual_plugin = 'calibre.gui2.store.manybooks_plugin:ManyBooksStore'
drm_free_only = True
headquarters = 'US'
formats = ['EPUB', 'FB2', 'JAR', 'LIT', 'LRF', 'MOBI', 'PDB', 'PDF', 'RB', 'RTF', 'TCR', 'TXT', 'ZIP']
class StoreMobileReadStore(StoreBase):
name = 'MobileRead'
description = _('Ebooks handcrafted with the utmost care')
description = u'Ebooks handcrafted with the utmost care.'
actual_plugin = 'calibre.gui2.store.mobileread.mobileread_plugin:MobileReadStore'
drm_free_only = True
headquarters = 'CH'
formats = ['EPUB', 'IMP', 'LRF', 'LIT', 'MOBI', 'PDF']
class StoreNextoStore(StoreBase):
name = 'Nexto'
author = u'Tomasz Długosz'
description = u'Największy w Polsce sklep internetowy z audiobookami mp3, ebookami pdf oraz prasą do pobrania on-line.'
actual_plugin = 'calibre.gui2.store.nexto_plugin:NextoStore'
headquarters = 'PL'
formats = ['EPUB', 'PDF']
affiliate = True
class StoreOpenLibraryStore(StoreBase):
name = 'Open Library'
description = _('One web page for every book.')
description = u'One web page for every book ever published. The goal is to be a true online library. Over 20 million records from a variety of large catalogs as well as single contributions, with more on the way.'
actual_plugin = 'calibre.gui2.store.open_library_plugin:OpenLibraryStore'
drm_free_only = True
headquarters = 'US'
formats = ['DAISY', 'DJVU', 'EPUB', 'MOBI', 'PDF', 'TXT']
class StoreOReillyStore(StoreBase):
name = 'OReilly'
description = u'Programming and tech ebooks from OReilly.'
actual_plugin = 'calibre.gui2.store.oreilly_plugin:OReillyStore'
drm_free_only = True
headquarters = 'US'
formats = ['APK', 'DAISY', 'EPUB', 'MOBI', 'PDF']
class StorePragmaticBookshelfStore(StoreBase):
name = 'Pragmatic Bookshelf'
description = u'The Pragmatic Bookshelf\'s collection of programming and tech books avaliable as ebooks.'
actual_plugin = 'calibre.gui2.store.pragmatic_bookshelf_plugin:PragmaticBookshelfStore'
drm_free_only = True
headquarters = 'US'
formats = ['EPUB', 'MOBI', 'PDF']
class StoreSmashwordsStore(StoreBase):
name = 'Smashwords'
description = _('Your ebook. Your way.')
description = u'An ebook publishing and distribution platform for ebook authors, publishers and readers. Covers many genres and formats.'
actual_plugin = 'calibre.gui2.store.smashwords_plugin:SmashwordsStore'
drm_free_only = True
headquarters = 'US'
formats = ['EPUB', 'HTML', 'LRF', 'MOBI', 'PDB', 'RTF', 'TXT']
affiliate = True
class StoreVirtualoStore(StoreBase):
name = 'Virtualo'
author = u'Tomasz Długosz'
description = u'Księgarnia internetowa, która oferuje bezpieczny i szeroki dostęp do książek w formie cyfrowej.'
actual_plugin = 'calibre.gui2.store.virtualo_plugin:VirtualoStore'
headquarters = 'PL'
formats = ['EPUB', 'PDF']
class StoreWaterstonesUKStore(StoreBase):
name = 'Waterstones UK'
description = _('Feel every word')
author = 'Charles Haley'
description = u'Waterstone\'s mission is to be the leading Bookseller on the High Street and online providing customers the widest choice, great value and expert advice from a team passionate about Bookselling.'
actual_plugin = 'calibre.gui2.store.waterstones_uk_plugin:WaterstonesUKStore'
headquarters = 'UK'
formats = ['EPUB', 'PDF']
class StoreWeightlessBooksStore(StoreBase):
name = 'Weightless Books'
description = '(e)Books That Don\'t Weigh You Down'
description = u'An independent DRM-free ebooksite devoted to ebooks of all sorts.'
actual_plugin = 'calibre.gui2.store.weightless_books_plugin:WeightlessBooksStore'
drm_free_only = True
headquarters = 'US'
formats = ['EPUB', 'HTML', 'LIT', 'MOBI', 'PDF']
class StoreWHSmithUKStore(StoreBase):
name = 'WH Smith UK'
author = 'Charles Haley'
description = u"Shop for savings on Books, discounted Magazine subscriptions and great prices on Stationery, Toys & Games"
actual_plugin = 'calibre.gui2.store.whsmith_uk_plugin:WHSmithUKStore'
headquarters = 'UK'
formats = ['EPUB', 'PDF']
class StoreWizardsTowerBooksStore(StoreBase):
name = 'Wizards Tower Books'
description = 'Wizard\'s Tower Press'
description = u'A science fiction and fantasy publisher. Concentrates mainly on making out-of-print works available once more as e-books, and helping other small presses exploit the e-book market. Also publishes a small number of limited-print-run anthologies with a view to encouraging diversity in the science fiction and fantasy field.'
actual_plugin = 'calibre.gui2.store.wizards_tower_books_plugin:WizardsTowerBooksStore'
plugins += [StoreAmazonKindleStore, StoreAmazonDEKindleStore, StoreAmazonUKKindleStore,
StoreBaenWebScriptionStore, StoreBNStore,
StoreBeamEBooksDEStore, StoreBeWriteStore,
StoreDieselEbooksStore, StoreEbookscomStore, StoreEPubBuyDEStore,
StoreEHarlequinStore, StoreFeedbooksStore,
StoreFoylesUKStore, StoreGutenbergStore, StoreKoboStore, StoreManyBooksStore,
StoreMobileReadStore, StoreOpenLibraryStore, StoreSmashwordsStore,
StoreWaterstonesUKStore, StoreWeightlessBooksStore, StoreWizardsTowerBooksStore]
drm_free_only = True
headquarters = 'UK'
formats = ['EPUB', 'MOBI']
class StoreWoblinkStore(StoreBase):
name = 'Woblink'
author = u'Tomasz Długosz'
description = u'Czytanie zdarza się wszędzie!'
actual_plugin = 'calibre.gui2.store.woblink_plugin:WoblinkStore'
headquarters = 'PL'
formats = ['EPUB']
plugins += [
StoreArchiveOrgStore,
StoreAmazonKindleStore,
StoreAmazonDEKindleStore,
StoreAmazonUKKindleStore,
StoreBaenWebScriptionStore,
StoreBNStore,
StoreBeamEBooksDEStore,
StoreBeWriteStore,
StoreDieselEbooksStore,
StoreEbookscomStore,
StoreEBookShoppeUKStore,
StoreEPubBuyDEStore,
StoreEHarlequinStore,
StoreEpubBudStore,
StoreFeedbooksStore,
StoreFoylesUKStore,
StoreGandalfStore,
StoreGoogleBooksStore,
StoreGutenbergStore,
StoreKoboStore,
StoreLegimiStore,
StoreManyBooksStore,
StoreMobileReadStore,
StoreNextoStore,
StoreOpenLibraryStore,
StoreOReillyStore,
StorePragmaticBookshelfStore,
StoreSmashwordsStore,
StoreVirtualoStore,
StoreWaterstonesUKStore,
StoreWeightlessBooksStore,
StoreWHSmithUKStore,
StoreWizardsTowerBooksStore,
StoreWoblinkStore
]
# }}}

View File

@ -216,9 +216,26 @@ def store_plugins():
customization = config['plugin_customization']
for plugin in _initialized_plugins:
if isinstance(plugin, Store):
if not is_disabled(plugin):
plugin.site_customization = customization.get(plugin.name, '')
yield plugin
plugin.site_customization = customization.get(plugin.name, '')
yield plugin
def available_store_plugins():
for plugin in store_plugins():
if not is_disabled(plugin):
yield plugin
def stores():
stores = set([])
for plugin in store_plugins():
stores.add(plugin.name)
return stores
def available_stores():
stores = set([])
for plugin in available_store_plugins():
stores.add(plugin.name)
return stores
# }}}
# Metadata read/write {{{

View File

@ -52,6 +52,8 @@ class ANDROID(USBMS):
0x04e8 : { 0x681d : [0x0222, 0x0223, 0x0224, 0x0400],
0x681c : [0x0222, 0x0224, 0x0400],
0x6640 : [0x0100],
0x685e : [0x0400],
0x6860 : [0x0400],
0x6877 : [0x0400],
},
@ -59,7 +61,7 @@ class ANDROID(USBMS):
0x0489 : { 0xc001 : [0x0226], 0xc004 : [0x0226], },
# Acer
0x502 : { 0x3203 : [0x0100]},
0x502 : { 0x3203 : [0x0100, 0x224]},
# Dell
0x413c : { 0xb007 : [0x0100, 0x0224, 0x0226]},
@ -109,10 +111,12 @@ class ANDROID(USBMS):
'SGH-T849', '_MB300', 'A70S', 'S_ANDROID', 'A101IT', 'A70H',
'IDEOS_TABLET', 'MYTOUCH_4G', 'UMS_COMPOSITE', 'SCH-I800_CARD',
'7', 'A956', 'A955', 'A43', 'ANDROID_PLATFORM', 'TEGRA_2',
'MB860', 'MULTI-CARD', 'MID7015A', 'INCREDIBLE', 'A7EB', 'STREAK']
'MB860', 'MULTI-CARD', 'MID7015A', 'INCREDIBLE', 'A7EB', 'STREAK',
'MB525']
WINDOWS_CARD_A_MEM = ['ANDROID_PHONE', 'GT-I9000_CARD', 'SGH-I897',
'FILE-STOR_GADGET', 'SGH-T959', 'SAMSUNG_ANDROID', 'GT-P1000_CARD',
'A70S', 'A101IT', '7', 'INCREDIBLE', 'A7EB', 'SGH-T849_CARD']
'A70S', 'A101IT', '7', 'INCREDIBLE', 'A7EB', 'SGH-T849_CARD',
'__UMS_COMPOSITE']
OSX_MAIN_MEM = 'Android Device Main Memory'

View File

@ -941,7 +941,7 @@ class ITUNES(DriverBase):
# declared in use_plugboard_ext and a device name of ITUNES
if DEBUG:
self.log.info("ITUNES.set_plugboard()")
#self.log.info(' using plugboard %s' % plugboards)
#self.log.info(' plugboard: %s' % plugboards)
self.plugboards = plugboards
self.plugboard_func = pb_func
@ -1052,7 +1052,6 @@ class ITUNES(DriverBase):
'title': metadata[i].title,
'uuid': metadata[i].uuid }
# Report progress
if self.report_progress is not None:
self.report_progress((i+1)/file_count, _('%d of %d') % (i+1, file_count))
@ -2393,6 +2392,16 @@ class ITUNES(DriverBase):
self.iTunes.Windows[0].Minimized = True
self.initial_status = 'launched'
try:
# Pre-emptive test to confirm functional iTunes automation interface
foo = self.iTunes.Version
foo
except:
self.iTunes = None
raise OpenFeedback('Unable to connect to iTunes.\n' +
' iTunes automation interface non-responsive, ' +
'recommend reinstalling iTunes')
# Read the current storage path for iTunes media from the XML file
media_dir = ''
string = None
@ -2744,7 +2753,6 @@ class ITUNES(DriverBase):
# Update metadata from plugboard
# If self.plugboard is None (no transforms), original metadata is returned intact
metadata_x = self._xform_metadata_via_plugboard(metadata, this_book.format)
if isosx:
if lb_added:
lb_added.name.set(metadata_x.title)
@ -2754,8 +2762,7 @@ class ITUNES(DriverBase):
lb_added.description.set("%s %s" % (self.description_prefix,strftime('%Y-%m-%d %H:%M:%S')))
lb_added.enabled.set(True)
lb_added.sort_artist.set(icu_title(metadata_x.author_sort))
lb_added.sort_name.set(metadata.title_sort)
lb_added.sort_name.set(metadata_x.title_sort)
if db_added:
db_added.name.set(metadata_x.title)
@ -2765,7 +2772,7 @@ class ITUNES(DriverBase):
db_added.description.set("%s %s" % (self.description_prefix,strftime('%Y-%m-%d %H:%M:%S')))
db_added.enabled.set(True)
db_added.sort_artist.set(icu_title(metadata_x.author_sort))
db_added.sort_name.set(metadata.title_sort)
db_added.sort_name.set(metadata_x.title_sort)
if metadata_x.comments:
if lb_added:
@ -2785,6 +2792,7 @@ class ITUNES(DriverBase):
# Set genre from series if available, else first alpha tag
# Otherwise iTunes grabs the first dc:subject from the opf metadata
# If title_sort applied in plugboard, that overrides using series/index as title_sort
if metadata_x.series and self.settings().extra_customization[self.USE_SERIES_AS_CATEGORY]:
if DEBUG:
self.log.info(" ITUNES._update_iTunes_metadata()")
@ -2796,7 +2804,9 @@ class ITUNES(DriverBase):
fraction = index-integer
series_index = '%04d%s' % (integer, str('%0.4f' % fraction).lstrip('0'))
if lb_added:
lb_added.sort_name.set("%s %s" % (self.title_sorter(metadata_x.series), series_index))
# If no title_sort plugboard tweak, create sort_name from series/index
if metadata.title_sort == metadata_x.title_sort:
lb_added.sort_name.set("%s %s" % (self.title_sorter(metadata_x.series), series_index))
lb_added.episode_ID.set(metadata_x.series)
lb_added.episode_number.set(metadata_x.series_index)
@ -2810,7 +2820,9 @@ class ITUNES(DriverBase):
break
if db_added:
db_added.sort_name.set("%s %s" % (self.title_sorter(metadata_x.series), series_index))
# If no title_sort plugboard tweak, create sort_name from series/index
if metadata.title_sort == metadata_x.title_sort:
db_added.sort_name.set("%s %s" % (self.title_sorter(metadata_x.series), series_index))
db_added.episode_ID.set(metadata_x.series)
db_added.episode_number.set(metadata_x.series_index)
@ -2845,7 +2857,7 @@ class ITUNES(DriverBase):
lb_added.Description = ("%s %s" % (self.description_prefix,strftime('%Y-%m-%d %H:%M:%S')))
lb_added.Enabled = True
lb_added.SortArtist = icu_title(metadata_x.author_sort)
lb_added.SortName = metadata.title_sort
lb_added.SortName = metadata_x.title_sort
if db_added:
db_added.Name = metadata_x.title
@ -2855,7 +2867,7 @@ class ITUNES(DriverBase):
db_added.Description = ("%s %s" % (self.description_prefix,strftime('%Y-%m-%d %H:%M:%S')))
db_added.Enabled = True
db_added.SortArtist = icu_title(metadata_x.author_sort)
db_added.SortName = metadata.title_sort
db_added.SortName = metadata_x.title_sort
if metadata_x.comments:
if lb_added:
@ -2888,7 +2900,9 @@ class ITUNES(DriverBase):
fraction = index-integer
series_index = '%04d%s' % (integer, str('%0.4f' % fraction).lstrip('0'))
if lb_added:
lb_added.SortName = "%s %s" % (self.title_sorter(metadata_x.series), series_index)
# If no title_sort plugboard tweak, create sort_name from series/index
if metadata.title_sort == metadata_x.title_sort:
lb_added.SortName = "%s %s" % (self.title_sorter(metadata_x.series), series_index)
lb_added.EpisodeID = metadata_x.series
try:
@ -2914,7 +2928,9 @@ class ITUNES(DriverBase):
break
if db_added:
db_added.SortName = "%s %s" % (self.title_sorter(metadata_x.series), series_index)
# If no title_sort plugboard tweak, create sort_name from series/index
if metadata.title_sort == metadata_x.title_sort:
db_added.SortName = "%s %s" % (self.title_sorter(metadata_x.series), series_index)
db_added.EpisodeID = metadata_x.series
try:
@ -2975,11 +2991,13 @@ class ITUNES(DriverBase):
newmi.publisher if book.publisher != newmi.publisher else ''))
self.log.info(" tags: %s %s" % (book.tags, ">>> %s" %
newmi.tags if book.tags != newmi.tags else ''))
else:
self.log(" matching plugboard not found")
else:
newmi = book
return newmi
class ITUNES_ASYNC(ITUNES):
'''
This subclass allows the user to interact directly with iTunes via a menu option
@ -3014,6 +3032,8 @@ class ITUNES_ASYNC(ITUNES):
pythoncom.CoInitialize()
self._launch_iTunes()
except:
import traceback
traceback.print_exc()
raise UserFeedback('unable to launch iTunes', details=None, level=UserFeedback.WARN)
finally:
pythoncom.CoUninitialize()

View File

@ -35,8 +35,8 @@ class EB600(USBMS):
PRODUCT_ID = [0x1688]
BCD = [0x110]
VENDOR_NAME = 'NETRONIX'
WINDOWS_MAIN_MEM = 'EBOOK'
VENDOR_NAME = ['NETRONIX', 'WOLDER']
WINDOWS_MAIN_MEM = ['EBOOK', 'MIBUK_GAMMA_6.2']
WINDOWS_CARD_A_MEM = 'EBOOK'
OSX_MAIN_MEM = 'EB600 Internal Storage Media'
@ -95,9 +95,8 @@ class POCKETBOOK360(EB600):
FORMATS = ['epub', 'fb2', 'prc', 'mobi', 'pdf', 'djvu', 'rtf', 'chm', 'txt']
VENDOR_NAME = 'PHILIPS'
WINDOWS_MAIN_MEM = 'MASS_STORGE'
WINDOWS_CARD_A_MEM = 'MASS_STORGE'
VENDOR_NAME = ['PHILIPS', '__POCKET']
WINDOWS_MAIN_MEM = WINDOWS_CARD_A_MEM = ['MASS_STORGE', 'BOOK_USB_STORAGE']
OSX_MAIN_MEM = 'Philips Mass Storge Media'
OSX_CARD_A_MEM = 'Philips Mass Storge Media'

View File

@ -8,10 +8,10 @@ from ctypes import cdll, POINTER, byref, pointer, Structure as _Structure, \
c_ubyte, c_ushort, c_int, c_char, c_void_p, c_byte, c_uint
from errno import EBUSY, ENOMEM
from calibre import iswindows, isosx, isfreebsd, load_library
from calibre import iswindows, isosx, isbsd, load_library
_libusb_name = 'libusb'
PATH_MAX = 511 if iswindows else 1024 if (isosx or isfreebsd) else 4096
PATH_MAX = 511 if iswindows else 1024 if (isosx or isbsd) else 4096
if iswindows:
class Structure(_Structure):
_pack_ = 1

View File

@ -224,13 +224,16 @@ class TREKSTOR(USBMS):
FORMATS = ['epub', 'txt', 'pdf']
VENDOR_ID = [0x1e68]
PRODUCT_ID = [0x0041, 0x0042]
PRODUCT_ID = [0x0041, 0x0042,
0x003e # This is for the EBOOK_PLAYER_5M https://bugs.launchpad.net/bugs/792091
]
BCD = [0x0002]
EBOOK_DIR_MAIN = 'Ebooks'
VENDOR_NAME = 'TREKSTOR'
WINDOWS_MAIN_MEM = WINDOWS_CARD_A_MEM = 'EBOOK_PLAYER_7'
WINDOWS_MAIN_MEM = WINDOWS_CARD_A_MEM = ['EBOOK_PLAYER_7',
'EBOOK_PLAYER_5M']
class EEEREADER(USBMS):
@ -269,8 +272,8 @@ class NEXTBOOK(USBMS):
EBOOK_DIR_MAIN = ''
VENDOR_NAME = 'NEXT2'
WINDOWS_MAIN_MEM = WINDOWS_CARD_A_MEM = '1.0.14'
VENDOR_NAME = ['NEXT2', 'BK7005']
WINDOWS_MAIN_MEM = WINDOWS_CARD_A_MEM = ['1.0.14', 'PLAYER']
SUPPORTS_SUB_DIRS = True
THUMBNAIL_HEIGHT = 120

View File

@ -107,3 +107,19 @@ class NOOK_COLOR(NOOK):
return filepath
def upload_cover(self, path, filename, metadata, filepath):
pass
class NOOK_TSR(NOOK):
gui_name = _('Nook Simple')
description = _('Communicate with the Nook TSR eBook reader.')
PRODUCT_ID = [0x003]
BCD = [0x216]
EBOOK_DIR_MAIN = EBOOK_DIR_CARD_A = 'My Files/Books'
WINDOWS_MAIN_MEM = WINDOWS_CARD_A_MEM = 'EBOOK_DISK'
def upload_cover(self, path, filename, metadata, filepath):
pass

View File

@ -204,7 +204,8 @@ class CollectionsBookList(BookList):
elif fm['datatype'] == 'text' and fm['is_multiple']:
val = orig_val
elif fm['datatype'] == 'composite' and fm['is_multiple']:
val = [v.strip() for v in val.split(fm['is_multiple'])]
val = [v.strip() for v in
val.split(fm['is_multiple']['ui_to_list'])]
else:
val = [val]

View File

@ -926,8 +926,8 @@ class Device(DeviceConfig, DevicePlugin):
if not isinstance(template, unicode):
template = template.decode('utf-8')
app_id = str(getattr(mdata, 'application_id', ''))
# The db id will be in the created filename
extra_components = get_components(template, mdata, fname,
id_ = mdata.get('id', fname)
extra_components = get_components(template, mdata, id_,
timefmt=opts.send_timefmt, length=maxlen-len(app_id)-1)
if not extra_components:
extra_components.append(sanitize(self.filename_callback(fname,

View File

@ -40,7 +40,7 @@ To get help on them specify the input and output file and then use the -h \
option.
For full documentation of the conversion system see
''') + 'http://calibre-ebook.com/user_manual/conversion.html'
''') + 'http://manual.calibre-ebook.com/conversion.html'
HEURISTIC_OPTIONS = ['markup_chapter_headings',
'italicize_common_cases', 'fix_indents',

View File

@ -101,7 +101,7 @@ class Container(object):
return None
return existing[0]
def add_name_to_manifest(self, name):
def add_name_to_manifest(self, name, mt=None):
item = self.manifest_item_for_name(name)
if item is not None:
return
@ -109,11 +109,27 @@ class Container(object):
item = manifest.makeelement('{%s}item'%OPF_NS, nsmap={'opf':OPF_NS},
href=self.name_to_href(name, posixpath.dirname(self.opf_name)),
id=self.generate_manifest_id())
mt = guess_type(posixpath.basename(name))[0]
if not mt:
mt = guess_type(posixpath.basename(name))[0]
if not mt:
mt = 'application/octest-stream'
item.set('media-type', mt)
manifest.append(item)
self.fix_tail(item)
def fix_tail(self, item):
'''
Designed only to work with self closing elements after item has
just been inserted/appended
'''
parent = item.getparent()
idx = parent.index(item)
if idx == 0:
item.tail = parent.text
else:
item.tail = parent[idx-1].tail
if idx == len(parent)-1:
parent[idx-1].tail = parent.text
def generate_manifest_id(self):
items = self.opf.xpath('//opf:manifest/opf:item[@id]',

View File

@ -413,6 +413,13 @@ class EPUBOutput(OutputFormatPlugin):
rule.style.removeProperty('margin-left')
# padding-left breaks rendering in webkit and gecko
rule.style.removeProperty('padding-left')
# Change whitespace:pre to pre-line to accommodate readers that
# cannot scroll horizontally
for rule in stylesheet.data.cssRules.rulesOfType(CSSRule.STYLE_RULE):
style = rule.style
ws = style.getPropertyValue('white-space')
if ws == 'pre':
style.setProperty('white-space', 'pre-wrap')
# }}}

View File

@ -20,7 +20,7 @@ from itertools import izip
from calibre.customize.conversion import InputFormatPlugin
from calibre.ebooks.chardet import xml_to_unicode
from calibre.customize.conversion import OptionRecommendation
from calibre.constants import islinux, isfreebsd, iswindows
from calibre.constants import islinux, isbsd, iswindows
from calibre import unicode_path, as_unicode
from calibre.utils.localization import get_lang
from calibre.utils.filenames import ascii_filename
@ -302,7 +302,7 @@ class HTMLInput(InputFormatPlugin):
if getattr(self, '_is_case_sensitive', None) is not None:
return self._is_case_sensitive
if not path or not os.path.exists(path):
return islinux or isfreebsd
return islinux or isbsd
self._is_case_sensitive = not (os.path.exists(path.lower()) \
and os.path.exists(path.upper()))
return self._is_case_sensitive

View File

@ -10,7 +10,7 @@ import os, sys, re
from urllib import unquote, quote
from urlparse import urlparse
from calibre import relpath, guess_type
from calibre import relpath, guess_type, remove_bracketed_text
from calibre.utils.config import tweaks
@ -27,20 +27,37 @@ def authors_to_string(authors):
else:
return ''
_bracket_pat = re.compile(r'[\[({].*?[})\]]')
def author_to_author_sort(author):
def author_to_author_sort(author, method=None):
if not author:
return ''
method = tweaks['author_sort_copy_method']
if method == 'copy' or (method == 'comma' and ',' in author):
return u''
sauthor = remove_bracketed_text(author).strip()
tokens = sauthor.split()
if len(tokens) < 2:
return author
author = _bracket_pat.sub('', author).strip()
tokens = author.split()
if tokens and tokens[-1] not in ('Inc.', 'Inc'):
tokens = tokens[-1:] + tokens[:-1]
if len(tokens) > 1 and method != 'nocomma':
tokens[0] += ','
return ' '.join(tokens)
if method is None:
method = tweaks['author_sort_copy_method']
if method == u'copy':
return author
suffixes = set([x.lower() for x in tweaks['author_name_suffixes']])
suffixes |= set([x+u'.' for x in suffixes])
last = tokens[-1].lower()
suffix = None
if last in suffixes:
suffix = tokens[-1]
tokens = tokens[:-1]
if method == u'comma' and u',' in u''.join(tokens):
return author
atokens = tokens[-1:] + tokens[:-1]
if suffix:
atokens.append(suffix)
if method != u'nocomma' and len(atokens) > 1:
atokens[0] += u','
return u' '.join(atokens)
def authors_to_sort_string(authors):
return ' & '.join(map(author_to_author_sort, authors))

View File

@ -41,27 +41,34 @@ field_metadata = FieldMetadata()
class SafeFormat(TemplateFormatter):
def get_value(self, key, args, kwargs):
def get_value(self, orig_key, args, kwargs):
if not orig_key:
return ''
orig_key = orig_key.lower()
key = orig_key
if key != 'title_sort' and key not in TOP_LEVEL_IDENTIFIERS:
key = field_metadata.search_term_to_field_key(key)
if key is None or (self.book and
key not in self.book.all_field_keys()):
if hasattr(self.book, orig_key):
key = orig_key
else:
raise ValueError(_('Value: unknown field ') + orig_key)
try:
key = key.lower()
if key != 'title_sort' and key not in TOP_LEVEL_IDENTIFIERS:
key = field_metadata.search_term_to_field_key(key)
b = self.book.get_user_metadata(key, False)
if b and b['datatype'] == 'int' and self.book.get(key, 0) == 0:
v = ''
elif b and b['datatype'] == 'float' and self.book.get(key, 0.0) == 0.0:
v = ''
else:
v = self.book.format_field(key, series_with_index=False)[1]
if v is None:
return ''
if v == '':
return ''
return v
except:
if DEBUG:
traceback.print_exc()
return key
b = None
if b and b['datatype'] == 'int' and self.book.get(key, 0) == 0:
v = ''
elif b and b['datatype'] == 'float' and self.book.get(key, 0.0) == 0.0:
v = ''
else:
v = self.book.format_field(key, series_with_index=False)[1]
if v is None:
return ''
if v == '':
return ''
return v
composite_formatter = SafeFormat()
@ -75,7 +82,7 @@ class Metadata(object):
Metadata from custom columns should be accessed via the get() method,
passing in the lookup name for the column, for example: "#mytags".
Use the :meth:`is_null` method to test if a filed is null.
Use the :meth:`is_null` method to test if a field is null.
This object also has functions to format fields into strings.
@ -106,7 +113,7 @@ class Metadata(object):
def is_null(self, field):
'''
Return True if the value of filed is null in this object.
Return True if the value of field is null in this object.
'null' means it is unknown or evaluates to False. So a title of
_('Unknown') is null or a language of 'und' is null.
@ -614,10 +621,7 @@ class Metadata(object):
orig_res = res
datatype = cmeta['datatype']
if datatype == 'text' and cmeta['is_multiple']:
if cmeta['display'].get('is_names', False):
res = u' & '.join(res)
else:
res = u', '.join(sorted(res, key=sort_key))
res = cmeta['is_multiple']['list_to_ui'].join(res)
elif datatype == 'series' and series_with_index:
if self.get_extra(key) is not None:
res = res + \
@ -628,6 +632,12 @@ class Metadata(object):
res = _('Yes') if res else _('No')
elif datatype == 'rating':
res = res/2.0
elif datatype in ['int', 'float']:
try:
fmt = cmeta['display'].get('number_format', None)
res = fmt.format(res)
except:
pass
return (name, unicode(res), orig_res, cmeta)
# convert top-level ids into their value
@ -655,7 +665,7 @@ class Metadata(object):
elif datatype == 'text' and fmeta['is_multiple']:
if isinstance(res, dict):
res = [k + ':' + v for k,v in res.items()]
res = u', '.join(sorted(res, key=sort_key))
res = fmeta['is_multiple']['list_to_ui'].join(sorted(res, key=sort_key))
elif datatype == 'series' and series_with_index:
res = res + ' [%s]'%self.format_series_index()
elif datatype == 'datetime':

View File

@ -5,8 +5,7 @@ Created on 4 Jun 2010
'''
from base64 import b64encode, b64decode
import json
import traceback
import json, traceback
from calibre.ebooks.metadata.book import SERIALIZABLE_FIELDS
from calibre.constants import filesystem_encoding, preferred_encoding
@ -69,6 +68,40 @@ def object_to_unicode(obj, enc=preferred_encoding):
return ans
return obj
def encode_is_multiple(fm):
if fm.get('is_multiple', None):
# migrate is_multiple back to a character
fm['is_multiple2'] = fm.get('is_multiple', {})
dt = fm.get('datatype', None)
if dt == 'composite':
fm['is_multiple'] = ','
else:
fm['is_multiple'] = '|'
else:
fm['is_multiple'] = None
fm['is_multiple2'] = {}
def decode_is_multiple(fm):
im = fm.get('is_multiple2', None)
if im:
fm['is_multiple'] = im
del fm['is_multiple2']
else:
# Must migrate the is_multiple from char to dict
im = fm.get('is_multiple', {})
if im:
dt = fm.get('datatype', None)
if dt == 'composite':
im = {'cache_to_list': ',', 'ui_to_list': ',',
'list_to_ui': ', '}
elif fm.get('display', {}).get('is_names', False):
im = {'cache_to_list': '|', 'ui_to_list': '&',
'list_to_ui': ', '}
else:
im = {'cache_to_list': '|', 'ui_to_list': ',',
'list_to_ui': ', '}
fm['is_multiple'] = im
class JsonCodec(object):
def __init__(self):
@ -93,9 +126,10 @@ class JsonCodec(object):
def encode_metadata_attr(self, book, key):
if key == 'user_metadata':
meta = book.get_all_user_metadata(make_copy=True)
for k in meta:
if meta[k]['datatype'] == 'datetime':
meta[k]['#value#'] = datetime_to_string(meta[k]['#value#'])
for fm in meta.itervalues():
if fm['datatype'] == 'datetime':
fm['#value#'] = datetime_to_string(fm['#value#'])
encode_is_multiple(fm)
return meta
if key in self.field_metadata:
datatype = self.field_metadata[key]['datatype']
@ -135,9 +169,10 @@ class JsonCodec(object):
if key == 'classifiers':
key = 'identifiers'
if key == 'user_metadata':
for k in value:
if value[k]['datatype'] == 'datetime':
value[k]['#value#'] = string_to_datetime(value[k]['#value#'])
for fm in value.itervalues():
if fm['datatype'] == 'datetime':
fm['#value#'] = string_to_datetime(fm['#value#'])
decode_is_multiple(fm)
return value
elif key in self.field_metadata:
if self.field_metadata[key]['datatype'] == 'datetime':

View File

@ -7,7 +7,7 @@ __docformat__ = 'restructuredtext en'
lxml based OPF parser.
'''
import re, sys, unittest, functools, os, uuid, glob, cStringIO, json
import re, sys, unittest, functools, os, uuid, glob, cStringIO, json, copy
from urllib import unquote
from urlparse import urlparse
@ -453,10 +453,13 @@ class TitleSortField(MetadataField):
def serialize_user_metadata(metadata_elem, all_user_metadata, tail='\n'+(' '*8)):
from calibre.utils.config import to_json
from calibre.ebooks.metadata.book.json_codec import object_to_unicode
from calibre.ebooks.metadata.book.json_codec import (object_to_unicode,
encode_is_multiple)
for name, fm in all_user_metadata.items():
try:
fm = copy.copy(fm)
encode_is_multiple(fm)
fm = object_to_unicode(fm)
fm = json.dumps(fm, default=to_json, ensure_ascii=False)
except:
@ -575,6 +578,7 @@ class OPF(object): # {{{
self._user_metadata_ = {}
temp = Metadata('x', ['x'])
from calibre.utils.config import from_json
from calibre.ebooks.metadata.book.json_codec import decode_is_multiple
elems = self.root.xpath('//*[name() = "meta" and starts-with(@name,'
'"calibre:user_metadata:") and @content]')
for elem in elems:
@ -585,6 +589,7 @@ class OPF(object): # {{{
fm = elem.get('content')
try:
fm = json.loads(fm, object_hook=from_json)
decode_is_multiple(fm)
temp.set_user_metadata(name, fm)
except:
prints('Failed to read user metadata:', name)

View File

@ -29,7 +29,7 @@ class Worker(Thread): # Get details {{{
Get book details from amazons book page in a separate thread
'''
def __init__(self, url, result_queue, browser, log, relevance, plugin, timeout=20):
def __init__(self, url, result_queue, browser, log, relevance, domain, plugin, timeout=20):
Thread.__init__(self)
self.daemon = True
self.url, self.result_queue = url, result_queue
@ -37,11 +37,12 @@ class Worker(Thread): # Get details {{{
self.relevance, self.plugin = relevance, plugin
self.browser = browser.clone_browser()
self.cover_url = self.amazon_id = self.isbn = None
self.domain = self.plugin.domain
self.domain = domain
months = {
'de': {
1 : ['jän'],
2 : ['februar'],
3 : ['märz'],
5 : ['mai'],
6 : ['juni'],
@ -199,7 +200,8 @@ class Worker(Thread): # Get details {{{
return
mi = Metadata(title, authors)
mi.set_identifier('amazon', asin)
idtype = 'amazon' if self.domain == 'com' else 'amazon_'+self.domain
mi.set_identifier(idtype, asin)
self.amazon_id = asin
try:
@ -404,12 +406,30 @@ class Amazon(Source):
'country\'s Amazon website.'), choices=AMAZON_DOMAINS),
)
def get_domain_and_asin(self, identifiers):
for key, val in identifiers.iteritems():
key = key.lower()
if key in ('amazon', 'asin'):
return 'com', val
if key.startswith('amazon_'):
domain = key.split('_')[-1]
if domain and domain in self.AMAZON_DOMAINS:
return domain, val
return None, None
def get_book_url(self, identifiers): # {{{
asin = identifiers.get('amazon', None)
if asin is None:
asin = identifiers.get('asin', None)
if asin:
return ('amazon', asin, 'http://amzn.com/%s'%asin)
domain, asin = self.get_domain_and_asin(identifiers)
if domain and asin:
url = None
if domain == 'com':
url = 'http://amzn.com/'+asin
elif domain == 'uk':
url = 'http://www.amazon.co.uk/dp/'+asin
else:
url = 'http://www.amazon.%s/dp/%s'%(domain, asin)
if url:
idtype = 'amazon' if self.domain == 'com' else 'amazon_'+self.domain
return (idtype, asin, url)
# }}}
@property
@ -420,8 +440,14 @@ class Amazon(Source):
return domain
def create_query(self, log, title=None, authors=None, identifiers={}): # {{{
domain = self.domain
def create_query(self, log, title=None, authors=None, identifiers={}, # {{{
domain=None):
if domain is None:
domain = self.domain
idomain, asin = self.get_domain_and_asin(identifiers)
if idomain is not None:
domain = idomain
# See the amazon detailed search page to get all options
q = { 'search-alias' : 'aps',
@ -433,7 +459,6 @@ class Amazon(Source):
else:
q['sort'] = 'relevancerank'
asin = identifiers.get('amazon', None)
isbn = check_isbn(identifiers.get('isbn', None))
if asin is not None:
@ -456,23 +481,22 @@ class Amazon(Source):
if not ('field-keywords' in q or 'field-isbn' in q or
('field-title' in q)):
# Insufficient metadata to make an identify query
return None
return None, None
latin1q = dict([(x.encode('latin1', 'ignore'), y.encode('latin1',
'ignore')) for x, y in
q.iteritems()])
udomain = domain
if domain == 'uk':
domain = 'co.uk'
url = 'http://www.amazon.%s/s/?'%domain + urlencode(latin1q)
return url
udomain = 'co.uk'
url = 'http://www.amazon.%s/s/?'%udomain + urlencode(latin1q)
return url, domain
# }}}
def get_cached_cover_url(self, identifiers): # {{{
url = None
asin = identifiers.get('amazon', None)
if asin is None:
asin = identifiers.get('asin', None)
domain, asin = self.get_domain_and_asin(identifiers)
if asin is None:
isbn = identifiers.get('isbn', None)
if isbn is not None:
@ -489,7 +513,7 @@ class Amazon(Source):
Note this method will retry without identifiers automatically if no
match is found with identifiers.
'''
query = self.create_query(log, title=title, authors=authors,
query, domain = self.create_query(log, title=title, authors=authors,
identifiers=identifiers)
if query is None:
log.error('Insufficient metadata to construct query')
@ -549,7 +573,8 @@ class Amazon(Source):
r'//div[@id="Results"]/descendant::td[starts-with(@id, "search:Td:")]'):
for a in td.xpath(r'descendant::td[@class="dataColumn"]/descendant::a[@href]/span[@class="srTitle"]/..'):
title = tostring(a, method='text', encoding=unicode).lower()
if 'bulk pack' not in title:
if ('bulk pack' not in title and '[audiobook]' not in
title and '[audio cd]' not in title):
matches.append(a.get('href'))
break
@ -570,7 +595,7 @@ class Amazon(Source):
log.error('No matches found with query: %r'%query)
return
workers = [Worker(url, result_queue, br, log, i, self) for i, url in
workers = [Worker(url, result_queue, br, log, i, domain, self) for i, url in
enumerate(matches)]
for w in workers:

Some files were not shown because too many files have changed in this diff Show More