mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Sync to trunk.
This commit is contained in:
commit
e32bdc0ce9
@ -1,5 +1,5 @@
|
|||||||
__license__ = 'GPL v3'
|
__license__ = 'GPL v3'
|
||||||
__copyright__ = '2010, Dean Cording'
|
__copyright__ = '2011, Pat Stapleton <pat.stapleton at gmail.com>'
|
||||||
'''
|
'''
|
||||||
abc.net.au/news
|
abc.net.au/news
|
||||||
'''
|
'''
|
||||||
@ -8,7 +8,7 @@ from calibre.web.feeds.recipes import BasicNewsRecipe
|
|||||||
|
|
||||||
class ABCNews(BasicNewsRecipe):
|
class ABCNews(BasicNewsRecipe):
|
||||||
title = 'ABC News'
|
title = 'ABC News'
|
||||||
__author__ = 'Dean Cording'
|
__author__ = 'Pat Stapleton, Dean Cording'
|
||||||
description = 'News from Australia'
|
description = 'News from Australia'
|
||||||
masthead_url = 'http://www.abc.net.au/news/assets/v5/images/common/logo-news.png'
|
masthead_url = 'http://www.abc.net.au/news/assets/v5/images/common/logo-news.png'
|
||||||
cover_url = 'http://www.abc.net.au/news/assets/v5/images/common/logo-news.png'
|
cover_url = 'http://www.abc.net.au/news/assets/v5/images/common/logo-news.png'
|
||||||
@ -23,7 +23,9 @@ class ABCNews(BasicNewsRecipe):
|
|||||||
category = 'News, Australia, World'
|
category = 'News, Australia, World'
|
||||||
language = 'en_AU'
|
language = 'en_AU'
|
||||||
publication_type = 'newsportal'
|
publication_type = 'newsportal'
|
||||||
preprocess_regexps = [(re.compile(r'<!--.*?-->', re.DOTALL), lambda m: '')]
|
# preprocess_regexps = [(re.compile(r'<!--.*?-->', re.DOTALL), lambda m: '')]
|
||||||
|
#Remove annoying map links (inline-caption class is also used for some image captions! hence regex to match maps.google)
|
||||||
|
preprocess_regexps = [(re.compile(r'<a class="inline-caption" href="http://maps\.google\.com.*?/a>', re.DOTALL), lambda m: '')]
|
||||||
conversion_options = {
|
conversion_options = {
|
||||||
'comments' : description
|
'comments' : description
|
||||||
,'tags' : category
|
,'tags' : category
|
||||||
@ -32,23 +34,23 @@ class ABCNews(BasicNewsRecipe):
|
|||||||
,'linearize_tables': False
|
,'linearize_tables': False
|
||||||
}
|
}
|
||||||
|
|
||||||
keep_only_tags = dict(id='article')
|
keep_only_tags = [dict(attrs={'class':['article section']})]
|
||||||
|
|
||||||
remove_tags = [dict(attrs={'class':['related', 'tags']}),
|
remove_tags = [dict(attrs={'class':['related', 'tags', 'tools', 'attached-content ready',
|
||||||
dict(id='statepromo')
|
'inline-content story left', 'inline-content map left contracted', 'published',
|
||||||
]
|
'story-map', 'statepromo', 'topics', ]})]
|
||||||
|
|
||||||
remove_attributes = ['width','height']
|
remove_attributes = ['width','height']
|
||||||
|
|
||||||
feeds = [
|
feeds = [
|
||||||
('Top Stories', 'http://www.abc.net.au/news/syndicate/topstoriesrss.xml'),
|
('Top Stories', 'http://www.abc.net.au/news/feed/45910/rss.xml'),
|
||||||
('Canberra', 'http://www.abc.net.au/news/indexes/idx-act/rss.xml'),
|
('Canberra', 'http://www.abc.net.au/news/feed/6910/rss.xml'),
|
||||||
('Sydney', 'http://www.abc.net.au/news/indexes/sydney/rss.xml'),
|
('Sydney', 'http://www.abc.net.au/news/feed/10232/rss.xml'),
|
||||||
('Melbourne', 'http://www.abc.net.au/news/indexes/melbourne/rss.xml'),
|
('Melbourne', 'http://www.abc.net.au/news/feed/21708/rss.xml'),
|
||||||
('Brisbane', 'http://www.abc.net.au/news/indexes/brisbane/rss.xml'),
|
('Brisbane', 'http://www.abc.net.au/news/feed/12858/rss.xml'),
|
||||||
('Perth', 'http://www.abc.net.au/news/indexes/perth/rss.xml'),
|
('Perth', 'feed://www.abc.net.au/news/feed/24886/rss.xml'),
|
||||||
('Australia', 'http://www.abc.net.au/news/indexes/idx-australia/rss.xml'),
|
('Australia', 'http://www.abc.net.au/news/feed/46182/rss.xml'),
|
||||||
('World', 'http://www.abc.net.au/news/indexes/world/rss.xml'),
|
('World', 'http://www.abc.net.au/news/feed/52278/rss.xml'),
|
||||||
('Business', 'http://www.abc.net.au/news/indexes/business/rss.xml'),
|
('Business', 'http://www.abc.net.au/news/feed/51892/rss.xml'),
|
||||||
('Science and Technology', 'http://www.abc.net.au/news/tag/science-and-technology/rss.xml'),
|
('Science and Technology', 'http://www.abc.net.au/news/feed/2298/rss.xml'),
|
||||||
]
|
]
|
||||||
|
@ -1,61 +1,648 @@
|
|||||||
__license__ = 'GPL v3'
|
##
|
||||||
__copyright__ = '2010, Darko Miletic <darko.miletic at gmail.com>'
|
## Title: BBC News, Sport, and Blog Calibre Recipe
|
||||||
|
## Contact: mattst - jmstanfield@gmail.com
|
||||||
|
##
|
||||||
|
## License: GNU General Public License v3 - http://www.gnu.org/copyleft/gpl.html
|
||||||
|
## Copyright: mattst - jmstanfield@gmail.com
|
||||||
|
##
|
||||||
|
## Written: November 2011
|
||||||
|
## Last Edited: 2011-11-19
|
||||||
|
##
|
||||||
|
|
||||||
|
__license__ = 'GNU General Public License v3 - http://www.gnu.org/copyleft/gpl.html'
|
||||||
|
__copyright__ = 'mattst - jmstanfield@gmail.com'
|
||||||
|
|
||||||
|
|
||||||
'''
|
'''
|
||||||
news.bbc.co.uk
|
BBC News, Sport, and Blog Calibre Recipe
|
||||||
'''
|
'''
|
||||||
|
|
||||||
|
# Import the regular expressions module.
|
||||||
import re
|
import re
|
||||||
|
|
||||||
|
# Import the BasicNewsRecipe class which this class extends.
|
||||||
from calibre.web.feeds.recipes import BasicNewsRecipe
|
from calibre.web.feeds.recipes import BasicNewsRecipe
|
||||||
|
|
||||||
class BBC(BasicNewsRecipe):
|
class BBCNewsSportBlog(BasicNewsRecipe):
|
||||||
title = 'BBC News'
|
|
||||||
__author__ = 'Darko Miletic, Starson17'
|
#
|
||||||
description = 'News from UK. '
|
# **** IMPORTANT USERS READ ME ****
|
||||||
oldest_article = 2
|
#
|
||||||
max_articles_per_feed = 100
|
# First select the feeds you want then scroll down below the feeds list
|
||||||
no_stylesheets = True
|
# and select the values you want for the other user preferences, like
|
||||||
#delay = 1
|
# oldest_article and such like.
|
||||||
use_embedded_content = False
|
#
|
||||||
encoding = 'utf8'
|
#
|
||||||
publisher = 'BBC'
|
# Select the BBC rss feeds which you want in your ebook.
|
||||||
category = 'news, UK, world'
|
# Selected feed have NO '#' at their start, de-selected feeds begin with a '#'.
|
||||||
language = 'en_GB'
|
#
|
||||||
publication_type = 'newsportal'
|
# Eg. ("News Home", "http://feeds.bbci.co.uk/... - include feed.
|
||||||
extra_css = ' body{ font-family: Verdana,Helvetica,Arial,sans-serif } .introduction{font-weight: bold} .story-feature{display: block; padding: 0; border: 1px solid; width: 40%; font-size: small} .story-feature h2{text-align: center; text-transform: uppercase} '
|
# Eg. #("News Home", "http://feeds.bbci.co.uk/... - do not include feed.
|
||||||
preprocess_regexps = [(re.compile(r'<!--.*?-->', re.DOTALL), lambda m: '')]
|
#
|
||||||
conversion_options = {
|
# There are 68 feeds below which constitute the bulk of the available rss
|
||||||
'comments' : description
|
# feeds on the BBC web site. These include 5 blogs by editors and
|
||||||
,'tags' : category
|
# correspondants, 16 sports feeds, 15 'sub' regional feeds (Eg. North West
|
||||||
,'language' : language
|
# Wales, Scotland Business), and 7 Welsh language feeds.
|
||||||
,'publisher' : publisher
|
#
|
||||||
,'linearize_tables': True
|
# Some of the feeds are low volume (Eg. blogs), or very low volume (Eg. Click)
|
||||||
|
# so if "oldest_article = 1.5" (only articles published in the last 36 hours)
|
||||||
|
# you may get some 'empty feeds' which will not then be included in the ebook.
|
||||||
|
#
|
||||||
|
# The 15 feeds currently selected below are simply my default ones.
|
||||||
|
#
|
||||||
|
# Note: With all 68 feeds selected, oldest_article set to 2,
|
||||||
|
# max_articles_per_feed set to 100, and simultaneous_downloads set to 10,
|
||||||
|
# the ebook creation took 29 minutes on my speedy 100 mbps net connection,
|
||||||
|
# fairly high-end desktop PC running Linux (Ubuntu Lucid-Lynx).
|
||||||
|
# More realistically with 15 feeds selected, oldest_article set to 1.5,
|
||||||
|
# max_articles_per_feed set to 100, and simultaneous_downloads set to 20,
|
||||||
|
# it took 6 minutes. If that's too slow increase 'simultaneous_downloads'.
|
||||||
|
#
|
||||||
|
# Select / de-select the feeds you want in your ebook.
|
||||||
|
#
|
||||||
|
feeds = [
|
||||||
|
("News Home", "http://feeds.bbci.co.uk/news/rss.xml"),
|
||||||
|
("UK", "http://feeds.bbci.co.uk/news/uk/rss.xml"),
|
||||||
|
("World", "http://feeds.bbci.co.uk/news/world/rss.xml"),
|
||||||
|
#("England", "http://feeds.bbci.co.uk/news/england/rss.xml"),
|
||||||
|
#("Scotland", "http://feeds.bbci.co.uk/news/scotland/rss.xml"),
|
||||||
|
#("Wales", "http://feeds.bbci.co.uk/news/wales/rss.xml"),
|
||||||
|
#("N. Ireland", "http://feeds.bbci.co.uk/news/northern_ireland/rss.xml"),
|
||||||
|
#("Africa", "http://feeds.bbci.co.uk/news/world/africa/rss.xml"),
|
||||||
|
#("Asia", "http://feeds.bbci.co.uk/news/world/asia/rss.xml"),
|
||||||
|
#("Europe", "http://feeds.bbci.co.uk/news/world/europe/rss.xml"),
|
||||||
|
#("Latin America", "http://feeds.bbci.co.uk/news/world/latin_america/rss.xml"),
|
||||||
|
#("Middle East", "http://feeds.bbci.co.uk/news/world/middle_east/rss.xml"),
|
||||||
|
("US & Canada", "http://feeds.bbci.co.uk/news/world/us_and_canada/rss.xml"),
|
||||||
|
("Politics", "http://feeds.bbci.co.uk/news/politics/rss.xml"),
|
||||||
|
("Science/Environment", "http://feeds.bbci.co.uk/news/science_and_environment/rss.xml"),
|
||||||
|
("Technology", "http://feeds.bbci.co.uk/news/technology/rss.xml"),
|
||||||
|
("Magazine", "http://feeds.bbci.co.uk/news/magazine/rss.xml"),
|
||||||
|
("Entertainment/Arts", "http://feeds.bbci.co.uk/news/entertainment_and_arts/rss.xml"),
|
||||||
|
#("Health", "http://feeds.bbci.co.uk/news/health/rss.xml"),
|
||||||
|
#("Education/Family", "http://feeds.bbci.co.uk/news/education/rss.xml"),
|
||||||
|
("Business", "http://feeds.bbci.co.uk/news/business/rss.xml"),
|
||||||
|
("Special Reports", "http://feeds.bbci.co.uk/news/special_reports/rss.xml"),
|
||||||
|
("Also in the News", "http://feeds.bbci.co.uk/news/also_in_the_news/rss.xml"),
|
||||||
|
#("Newsbeat", "http://www.bbc.co.uk/newsbeat/rss.xml"),
|
||||||
|
#("Click", "http://newsrss.bbc.co.uk/rss/newsonline_uk_edition/programmes/click_online/rss.xml"),
|
||||||
|
("Blog: Nick Robinson (Political Editor)", "http://feeds.bbci.co.uk/news/correspondents/nickrobinson/rss.sxml"),
|
||||||
|
#("Blog: Mark D'Arcy (Parliamentary Correspondent)", "http://feeds.bbci.co.uk/news/correspondents/markdarcy/rss.sxml"),
|
||||||
|
#("Blog: Robert Peston (Business Editor)", "http://feeds.bbci.co.uk/news/correspondents/robertpeston/rss.sxml"),
|
||||||
|
#("Blog: Stephanie Flanders (Economics Editor)", "http://feeds.bbci.co.uk/news/correspondents/stephanieflanders/rss.sxml"),
|
||||||
|
("Blog: Rory Cellan-Jones (Technology correspondent)", "http://feeds.bbci.co.uk/news/correspondents/rorycellanjones/rss.sxml"),
|
||||||
|
("Sport Front Page", "http://newsrss.bbc.co.uk/rss/sportonline_uk_edition/front_page/rss.xml"),
|
||||||
|
#("Football", "http://newsrss.bbc.co.uk/rss/sportonline_uk_edition/football/rss.xml"),
|
||||||
|
#("Cricket", "http://newsrss.bbc.co.uk/rss/sportonline_uk_edition/cricket/rss.xml"),
|
||||||
|
#("Rugby Union", "http://newsrss.bbc.co.uk/rss/sportonline_uk_edition/rugby_union/rss.xml"),
|
||||||
|
#("Rugby League", "http://newsrss.bbc.co.uk/rss/sportonline_uk_edition/rugby_league/rss.xml"),
|
||||||
|
#("Tennis", "http://newsrss.bbc.co.uk/rss/sportonline_uk_edition/tennis/rss.xml"),
|
||||||
|
#("Golf", "http://newsrss.bbc.co.uk/rss/sportonline_uk_edition/golf/rss.xml"),
|
||||||
|
#("Motorsport", "http://newsrss.bbc.co.uk/rss/sportonline_uk_edition/motorsport/rss.xml"),
|
||||||
|
#("Boxing", "http://newsrss.bbc.co.uk/rss/sportonline_uk_edition/boxing/rss.xml"),
|
||||||
|
#("Athletics", "http://newsrss.bbc.co.uk/rss/sportonline_uk_edition/athletics/rss.xml"),
|
||||||
|
#("Snooker", "http://newsrss.bbc.co.uk/rss/sportonline_uk_edition/other_sports/snooker/rss.xml"),
|
||||||
|
#("Horse Racing", "http://newsrss.bbc.co.uk/rss/sportonline_uk_edition/other_sports/horse_racing/rss.xml"),
|
||||||
|
#("Cycling", "http://newsrss.bbc.co.uk/rss/sportonline_uk_edition/other_sports/cycling/rss.xml"),
|
||||||
|
#("Disability Sport", "http://newsrss.bbc.co.uk/rss/sportonline_uk_edition/other_sports/disability_sport/rss.xml"),
|
||||||
|
#("Other Sport", "http://newsrss.bbc.co.uk/rss/sportonline_uk_edition/other_sports/rss.xml"),
|
||||||
|
#("Olympics 2012", "http://newsrss.bbc.co.uk/rss/sportonline_uk_edition/other_sports/olympics_2012/rss.xml"),
|
||||||
|
#("N. Ireland Politics", "http://feeds.bbci.co.uk/news/northern_ireland/northern_ireland_politics/rss.xml"),
|
||||||
|
#("Scotland Politics", "http://feeds.bbci.co.uk/news/scotland/scotland_politics/rss.xml"),
|
||||||
|
#("Scotland Business", "http://feeds.bbci.co.uk/news/scotland/scotland_business/rss.xml"),
|
||||||
|
#("E. Scotland, Edinburgh & Fife", "http://feeds.bbci.co.uk/news/scotland/edinburgh_east_and_fife/rss.xml"),
|
||||||
|
#("W. Scotland & Glasgow", "http://feeds.bbci.co.uk/news/scotland/glasgow_and_west/rss.xml"),
|
||||||
|
#("Highlands & Islands", "http://feeds.bbci.co.uk/news/scotland/highlands_and_islands/rss.xml"),
|
||||||
|
#("NE. Scotland, Orkney & Shetland", "http://feeds.bbci.co.uk/news/scotland/north_east_orkney_and_shetland/rss.xml"),
|
||||||
|
#("South Scotland", "http://feeds.bbci.co.uk/news/scotland/south_scotland/rss.xml"),
|
||||||
|
#("Central Scotland & Tayside", "http://feeds.bbci.co.uk/news/scotland/tayside_and_central/rss.xml"),
|
||||||
|
#("Wales Politics", "http://feeds.bbci.co.uk/news/wales/wales_politics/rss.xml"),
|
||||||
|
#("NW. Wales", "http://feeds.bbci.co.uk/news/wales/north_west_wales/rss.xml"),
|
||||||
|
#("NE. Wales", "http://feeds.bbci.co.uk/news/wales/north_east_wales/rss.xml"),
|
||||||
|
#("Mid. Wales", "http://feeds.bbci.co.uk/news/wales/mid_wales/rss.xml"),
|
||||||
|
#("SW. Wales", "http://feeds.bbci.co.uk/news/wales/south_west_wales/rss.xml"),
|
||||||
|
#("SE. Wales", "http://feeds.bbci.co.uk/news/wales/south_east_wales/rss.xml"),
|
||||||
|
#("Newyddion - News in Welsh", "http://feeds.bbci.co.uk/newyddion/rss.xml"),
|
||||||
|
#("Gwleidyddiaeth", "http://feeds.bbci.co.uk/newyddion/gwleidyddiaeth/rss.xml"),
|
||||||
|
#("Gogledd-Ddwyrain", "http://feeds.bbci.co.uk/newyddion/gogledd-ddwyrain/rss.xml"),
|
||||||
|
#("Gogledd-Orllewin", "http://feeds.bbci.co.uk/newyddion/gogledd-orllewin/rss.xml"),
|
||||||
|
#("Canolbarth", "http://feeds.bbci.co.uk/newyddion/canolbarth/rss.xml"),
|
||||||
|
#("De-Ddwyrain", "http://feeds.bbci.co.uk/newyddion/de-ddwyrain/rss.xml"),
|
||||||
|
#("De-Orllewin", "http://feeds.bbci.co.uk/newyddion/de-orllewin/rss.xml"),
|
||||||
|
]
|
||||||
|
|
||||||
|
|
||||||
|
# **** SELECT YOUR USER PREFERENCES ****
|
||||||
|
|
||||||
|
# Title to use for the ebook.
|
||||||
|
#
|
||||||
|
title = 'BBC News'
|
||||||
|
|
||||||
|
# A brief description for the ebook.
|
||||||
|
#
|
||||||
|
description = u'BBC web site ebook created using rss feeds.'
|
||||||
|
|
||||||
|
# The max number of articles which may be downloaded from each feed.
|
||||||
|
# I've never seen more than about 70 articles in a single feed in the
|
||||||
|
# BBC feeds.
|
||||||
|
#
|
||||||
|
max_articles_per_feed = 100
|
||||||
|
|
||||||
|
# The max age of articles which may be downloaded from each feed. This is
|
||||||
|
# specified in days - note fractions of days are allowed, Eg. 2.5 (2 and a
|
||||||
|
# half days). My default of 1.5 days is the last 36 hours, the point at
|
||||||
|
# which I've decided 'news' becomes 'old news', but be warned this is not
|
||||||
|
# so good for the blogs, technology, magazine, etc., and sports feeds.
|
||||||
|
# You may wish to extend this to 2-5 but watch out ebook creation time will
|
||||||
|
# increase as well. Setting this to 30 will get everything (AFAICT) as long
|
||||||
|
# as max_articles_per_feed remains set high (except for 'Click' which is
|
||||||
|
# v. low volume and its currently oldest article is 4th Feb 2011).
|
||||||
|
#
|
||||||
|
oldest_article = 1.5
|
||||||
|
|
||||||
|
# Number of simultaneous downloads. 20 is consistantly working fine on the
|
||||||
|
# BBC News feeds with no problems. Speeds things up from the defualt of 5.
|
||||||
|
# If you have a lot of feeds and/or have increased oldest_article above 2
|
||||||
|
# then you may wish to try increasing simultaneous_downloads to 25-30,
|
||||||
|
# Or, of course, if you are in a hurry. [I've not tried beyond 20.]
|
||||||
|
#
|
||||||
|
simultaneous_downloads = 20
|
||||||
|
|
||||||
|
# Timeout for fetching files from the server in seconds. The default of
|
||||||
|
# 120 seconds, seems somewhat excessive.
|
||||||
|
#
|
||||||
|
timeout = 30
|
||||||
|
|
||||||
|
# The format string for the date shown on the ebook's first page.
|
||||||
|
# List of all values: http://docs.python.org/library/time.html
|
||||||
|
# Default in news.py has a leading space so that's mirrored here.
|
||||||
|
# As with 'feeds' select/de-select by adding/removing the initial '#',
|
||||||
|
# only one timefmt should be selected, here's a few to choose from.
|
||||||
|
#
|
||||||
|
timefmt = ' [%a, %d %b %Y]' # [Fri, 14 Nov 2011] (Calibre default)
|
||||||
|
#timefmt = ' [%a, %d %b %Y %H:%M]' # [Fri, 14 Nov 2011 18:30]
|
||||||
|
#timefmt = ' [%a, %d %b %Y %I:%M %p]' # [Fri, 14 Nov 2011 06:30 PM]
|
||||||
|
#timefmt = ' [%d %b %Y]' # [14 Nov 2011]
|
||||||
|
#timefmt = ' [%d %b %Y %H:%M]' # [14 Nov 2011 18.30]
|
||||||
|
#timefmt = ' [%Y-%m-%d]' # [2011-11-14]
|
||||||
|
#timefmt = ' [%Y-%m-%d-%H-%M]' # [2011-11-14-18-30]
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
#
|
||||||
|
# **** IMPORTANT ****
|
||||||
|
#
|
||||||
|
# DO NOT EDIT BELOW HERE UNLESS YOU KNOW WHAT YOU ARE DOING.
|
||||||
|
#
|
||||||
|
# DO NOT EDIT BELOW HERE UNLESS YOU KNOW WHAT YOU ARE DOING.
|
||||||
|
#
|
||||||
|
# I MEAN IT, YES I DO, ABSOLUTELY, AT YOU OWN RISK. :)
|
||||||
|
#
|
||||||
|
# **** IMPORTANT ****
|
||||||
|
#
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
# Author of this recipe.
|
||||||
|
__author__ = 'mattst'
|
||||||
|
|
||||||
|
# Specify English as the language of the RSS feeds (ISO-639 code).
|
||||||
|
language = 'en_GB'
|
||||||
|
|
||||||
|
# Set tags.
|
||||||
|
tags = 'news, sport, blog'
|
||||||
|
|
||||||
|
# Set publisher and publication type.
|
||||||
|
publisher = 'BBC'
|
||||||
|
publication_type = 'newspaper'
|
||||||
|
|
||||||
|
# Disable stylesheets from site.
|
||||||
|
no_stylesheets = True
|
||||||
|
|
||||||
|
# Specifies an override encoding for sites that have an incorrect charset
|
||||||
|
# specified. Default of 'None' says to auto-detect. Some other BBC recipes
|
||||||
|
# use 'utf8', which works fine (so use that if necessary) but auto-detecting
|
||||||
|
# with None is working fine, so stick with that for robustness.
|
||||||
|
encoding = None
|
||||||
|
|
||||||
|
# Sets whether a feed has full articles embedded in it. The BBC feeds do not.
|
||||||
|
use_embedded_content = False
|
||||||
|
|
||||||
|
# Removes empty feeds - why keep them!?
|
||||||
|
remove_empty_feeds = True
|
||||||
|
|
||||||
|
# Create a custom title which fits nicely in the Kindle title list.
|
||||||
|
# Requires "import time" above class declaration, and replacing
|
||||||
|
# title with custom_title in conversion_options (right column only).
|
||||||
|
# Example of string below: "BBC News - 14 Nov 2011"
|
||||||
|
#
|
||||||
|
# custom_title = "BBC News - " + time.strftime('%d %b %Y')
|
||||||
|
|
||||||
|
'''
|
||||||
|
# Conversion options for advanced users, but don't forget to comment out the
|
||||||
|
# current conversion_options below. Avoid setting 'linearize_tables' as that
|
||||||
|
# plays havoc with the 'old style' table based pages.
|
||||||
|
#
|
||||||
|
conversion_options = { 'title' : title,
|
||||||
|
'comments' : description,
|
||||||
|
'tags' : tags,
|
||||||
|
'language' : language,
|
||||||
|
'publisher' : publisher,
|
||||||
|
'authors' : publisher,
|
||||||
|
'smarten_punctuation' : True
|
||||||
}
|
}
|
||||||
|
'''
|
||||||
|
|
||||||
keep_only_tags = [
|
conversion_options = { 'smarten_punctuation' : True }
|
||||||
dict(name='div', attrs={'class':['layout-block-a layout-block']})
|
|
||||||
,dict(attrs={'class':['story-body','storybody']})
|
|
||||||
]
|
|
||||||
|
|
||||||
remove_tags = [
|
# Specify extra CSS - overrides ALL other CSS (IE. Added last).
|
||||||
dict(name='div', attrs={'class':['story-feature related narrow', 'share-help', 'embedded-hyper',
|
extra_css = 'body { font-family: verdana, helvetica, sans-serif; } \
|
||||||
'story-feature wide ', 'story-feature narrow']}),
|
.introduction, .first { font-weight: bold; } \
|
||||||
dict(id=['hypertab', 'comment-form']),
|
.cross-head { font-weight: bold; font-size: 125%; } \
|
||||||
]
|
.cap, .caption { display: block; font-size: 80%; font-style: italic; } \
|
||||||
|
.cap, .caption, .caption img, .caption span { display: block; text-align: center; margin: 5px auto; } \
|
||||||
|
.byl, .byd, .byline img, .byline-name, .byline-title, .author-name, .author-position, \
|
||||||
|
.correspondent-portrait img, .byline-lead-in, .name, .bbc-role { display: block; \
|
||||||
|
text-align: center; font-size: 80%; font-style: italic; margin: 1px auto; } \
|
||||||
|
.story-date, .published { font-size: 80%; } \
|
||||||
|
table { width: 100%; } \
|
||||||
|
td img { display: block; margin: 5px auto; } \
|
||||||
|
ul { padding-top: 10px; } \
|
||||||
|
ol { padding-top: 10px; } \
|
||||||
|
li { padding-top: 5px; padding-bottom: 5px; } \
|
||||||
|
h1 { text-align: center; font-size: 175%; font-weight: bold; } \
|
||||||
|
h2 { text-align: center; font-size: 150%; font-weight: bold; } \
|
||||||
|
h3 { text-align: center; font-size: 125%; font-weight: bold; } \
|
||||||
|
h4, h5, h6 { text-align: center; font-size: 100%; font-weight: bold; }'
|
||||||
|
|
||||||
remove_attributes = ['width','height']
|
# Remove various tag attributes to improve the look of the ebook pages.
|
||||||
|
remove_attributes = [ 'border', 'cellspacing', 'align', 'cellpadding', 'colspan',
|
||||||
|
'valign', 'vspace', 'hspace', 'alt', 'width', 'height' ]
|
||||||
|
|
||||||
feeds = [
|
# Remove the (admittedly rarely used) line breaks, "<br />", which sometimes
|
||||||
('News Front Page', 'http://newsrss.bbc.co.uk/rss/newsonline_world_edition/front_page/rss.xml'),
|
# cause a section of the ebook to start in an unsightly fashion or, more
|
||||||
('Science/Nature', 'http://newsrss.bbc.co.uk/rss/newsonline_world_edition/science/nature/rss.xml'),
|
# frequently, a "<br />" will muck up the formatting of a correspondant's byline.
|
||||||
('Technology', 'http://newsrss.bbc.co.uk/rss/newsonline_world_edition/technology/rss.xml'),
|
# "<br />" and "<br clear/>" are far more frequently used on the table formatted
|
||||||
('Entertainment', 'http://newsrss.bbc.co.uk/rss/newsonline_world_edition/entertainment/rss.xml'),
|
# style of pages, and really spoil the look of the ebook pages.
|
||||||
('Magazine', 'http://newsrss.bbc.co.uk/rss/newsonline_world_edition/uk_news/magazine/rss.xml'),
|
preprocess_regexps = [(re.compile(r'<br[ ]*/>', re.IGNORECASE), lambda m: ''),
|
||||||
('Business', 'http://newsrss.bbc.co.uk/rss/newsonline_world_edition/business/rss.xml'),
|
(re.compile(r'<br[ ]*clear.*/>', re.IGNORECASE), lambda m: '')]
|
||||||
('Health', 'http://newsrss.bbc.co.uk/rss/newsonline_world_edition/health/rss.xml'),
|
|
||||||
('Americas', 'http://newsrss.bbc.co.uk/rss/newsonline_world_edition/americas/rss.xml'),
|
|
||||||
('Europe', 'http://newsrss.bbc.co.uk/rss/newsonline_world_edition/europe/rss.xml'),
|
|
||||||
('South Asia', 'http://newsrss.bbc.co.uk/rss/newsonline_world_edition/south_asia/rss.xml'),
|
|
||||||
('UK', 'http://newsrss.bbc.co.uk/rss/newsonline_world_edition/uk_news/rss.xml'),
|
|
||||||
('Asia-Pacific', 'http://newsrss.bbc.co.uk/rss/newsonline_world_edition/asia-pacific/rss.xml'),
|
|
||||||
('Africa', 'http://newsrss.bbc.co.uk/rss/newsonline_world_edition/africa/rss.xml'),
|
|
||||||
]
|
|
||||||
|
|
||||||
|
|
||||||
|
# Create regular expressions for tag keeping and removal to make the matches more
|
||||||
|
# robust against minor changes and errors in the HTML, Eg. double spaces, leading
|
||||||
|
# and trailing spaces, missing hyphens, and such like.
|
||||||
|
# Python regular expression ('re' class) page: http://docs.python.org/library/re.html
|
||||||
|
|
||||||
|
# ***************************************
|
||||||
|
# Regular expressions for keep_only_tags:
|
||||||
|
# ***************************************
|
||||||
|
|
||||||
|
# The BBC News HTML pages use variants of 'storybody' to denote the section of a HTML
|
||||||
|
# page which contains the main text of the article. Match storybody variants: 'storybody',
|
||||||
|
# 'story-body', 'story body','storybody ', etc.
|
||||||
|
storybody_reg_exp = '^.*story[_ -]*body.*$'
|
||||||
|
|
||||||
|
# The BBC sport and 'newsbeat' (features) HTML pages use 'blq_content' to hold the title
|
||||||
|
# and published date. This is one level above the usual news pages which have the title
|
||||||
|
# and date within 'story-body'. This is annoying since 'blq_content' must also be kept,
|
||||||
|
# resulting in a lot of extra things to be removed by remove_tags.
|
||||||
|
blq_content_reg_exp = '^.*blq[_ -]*content.*$'
|
||||||
|
|
||||||
|
# The BBC has an alternative page design structure, which I suspect is an out-of-date
|
||||||
|
# design but which is still used in some articles, Eg. 'Click' (technology), 'FastTrack'
|
||||||
|
# (travel), and in some sport pages. These alternative pages are table based (which is
|
||||||
|
# why I think they are an out-of-date design) and account for -I'm guesstimaking- less
|
||||||
|
# than 1% of all articles. They use a table class 'storycontent' to hold the article
|
||||||
|
# and like blq_content (above) have required lots of extra removal by remove_tags.
|
||||||
|
story_content_reg_exp = '^.*story[_ -]*content.*$'
|
||||||
|
|
||||||
|
# Keep the sections of the HTML which match the list below. The HTML page created by
|
||||||
|
# Calibre will fill <body> with those sections which are matched. Note that the
|
||||||
|
# blq_content_reg_exp must be listed before storybody_reg_exp in keep_only_tags due to
|
||||||
|
# it being the parent of storybody_reg_exp, that is to say the div class/id 'story-body'
|
||||||
|
# will be inside div class/id 'blq_content' in the HTML (if 'blq_content' is there at
|
||||||
|
# all). If they are the other way around in keep_only_tags then blq_content_reg_exp
|
||||||
|
# will end up being discarded.
|
||||||
|
keep_only_tags = [ dict(name='table', attrs={'class':re.compile(story_content_reg_exp, re.IGNORECASE)}),
|
||||||
|
dict(name='div', attrs={'class':re.compile(blq_content_reg_exp, re.IGNORECASE)}),
|
||||||
|
dict(name='div', attrs={'id':re.compile(blq_content_reg_exp, re.IGNORECASE)}),
|
||||||
|
dict(name='div', attrs={'class':re.compile(storybody_reg_exp, re.IGNORECASE)}),
|
||||||
|
dict(name='div', attrs={'id':re.compile(storybody_reg_exp, re.IGNORECASE)}) ]
|
||||||
|
|
||||||
|
# ************************************
|
||||||
|
# Regular expressions for remove_tags:
|
||||||
|
# ************************************
|
||||||
|
|
||||||
|
# Regular expression to remove share-help and variant tags. The share-help class
|
||||||
|
# is used by the site for a variety of 'sharing' type links, Eg. Facebook, delicious,
|
||||||
|
# twitter, email. Removed to avoid page clutter.
|
||||||
|
share_help_reg_exp = '^.*share[_ -]*help.*$'
|
||||||
|
|
||||||
|
# Regular expression to remove embedded-hyper and variant tags. This class is used to
|
||||||
|
# display links to other BBC News articles on the same/similar subject.
|
||||||
|
embedded_hyper_reg_exp = '^.*embed*ed[_ -]*hyper.*$'
|
||||||
|
|
||||||
|
# Regular expression to remove hypertabs and variant tags. This class is used to
|
||||||
|
# display a tab bar at the top of an article which allows the user to switch to
|
||||||
|
# an article (viewed on the same page) providing further info., 'in depth' analysis,
|
||||||
|
# an editorial, a correspondant's blog entry, and such like. The ability to handle
|
||||||
|
# a tab bar of this nature is currently beyond the scope of this recipe and
|
||||||
|
# possibly of Calibre itself (not sure about that - TO DO - check!).
|
||||||
|
hypertabs_reg_exp = '^.*hyper[_ -]*tabs.*$'
|
||||||
|
|
||||||
|
# Regular expression to remove story-feature and variant tags. Eg. 'story-feature',
|
||||||
|
# 'story-feature related narrow', 'story-feature wide', 'story-feature narrow'.
|
||||||
|
# This class is used to add additional info. boxes, or small lists, outside of
|
||||||
|
# the main story. TO DO: Work out a way to incorporate these neatly.
|
||||||
|
story_feature_reg_exp = '^.*story[_ -]*feature.*$'
|
||||||
|
|
||||||
|
# Regular expression to remove video and variant tags, Eg. 'videoInStoryB',
|
||||||
|
# 'videoInStoryC'. This class is used to embed video.
|
||||||
|
video_reg_exp = '^.*video.*$'
|
||||||
|
|
||||||
|
# Regular expression to remove audio and variant tags, Eg. 'audioInStoryD'.
|
||||||
|
# This class is used to embed audio.
|
||||||
|
audio_reg_exp = '^.*audio.*$'
|
||||||
|
|
||||||
|
# Regular expression to remove pictureGallery and variant tags, Eg. 'pictureGallery'.
|
||||||
|
# This class is used to embed a photo slideshow. See also 'slideshow' below.
|
||||||
|
picture_gallery_reg_exp = '^.*picture.*$'
|
||||||
|
|
||||||
|
# Regular expression to remove slideshow and variant tags, Eg. 'dslideshow-enclosure'.
|
||||||
|
# This class is used to embed a slideshow (not necessarily photo) but both
|
||||||
|
# 'slideshow' and 'pictureGallery' are used for slideshows.
|
||||||
|
slideshow_reg_exp = '^.*slide[_ -]*show.*$'
|
||||||
|
|
||||||
|
# Regular expression to remove social-links and variant tags. This class is used to
|
||||||
|
# display links to a BBC bloggers main page, used in various columnist's blogs
|
||||||
|
# (Eg. Nick Robinson, Robert Preston).
|
||||||
|
social_links_reg_exp = '^.*social[_ -]*links.*$'
|
||||||
|
|
||||||
|
# Regular expression to remove quote and (multi) variant tags, Eg. 'quote',
|
||||||
|
# 'endquote', 'quote-credit', 'quote-credit-title', etc. These are usually
|
||||||
|
# removed by 'story-feature' removal (as they are usually within them), but
|
||||||
|
# not always. The quotation removed is always (AFAICT) in the article text
|
||||||
|
# as well but a 2nd copy is placed in a quote tag to draw attention to it.
|
||||||
|
# The quote class tags may or may not appear in div's.
|
||||||
|
quote_reg_exp = '^.*quote.*$'
|
||||||
|
|
||||||
|
# Regular expression to remove hidden and variant tags, Eg. 'hidden'.
|
||||||
|
# The purpose of these is unclear, they seem to be an internal link to a
|
||||||
|
# section within the article, but the text of the link (Eg. 'Continue reading
|
||||||
|
# the main story') never seems to be displayed anyway. Removed to avoid clutter.
|
||||||
|
# The hidden class tags may or may not appear in div's.
|
||||||
|
hidden_reg_exp = '^.*hidden.*$'
|
||||||
|
|
||||||
|
# Regular expression to remove comment and variant tags, Eg. 'comment-introduction'.
|
||||||
|
# Used on the site to display text about registered users entering comments.
|
||||||
|
comment_reg_exp = '^.*comment.*$'
|
||||||
|
|
||||||
|
# Regular expression to remove form and variant tags, Eg. 'comment-form'.
|
||||||
|
# Used on the site to allow registered BBC users to fill in forms, typically
|
||||||
|
# for entering comments about an article.
|
||||||
|
form_reg_exp = '^.*form.*$'
|
||||||
|
|
||||||
|
# Extra things to remove due to the addition of 'blq_content' in keep_only_tags.
|
||||||
|
|
||||||
|
#<div class="story-actions"> Used on sports pages for 'email' and 'print'.
|
||||||
|
story_actions_reg_exp = '^.*story[_ -]*actions.*$'
|
||||||
|
|
||||||
|
#<div class="bookmark-list"> Used on sports pages instead of 'share-help' (for
|
||||||
|
# social networking links).
|
||||||
|
bookmark_list_reg_exp = '^.*bookmark[_ -]*list.*$'
|
||||||
|
|
||||||
|
#<div id="secondary-content" class="content-group">
|
||||||
|
# NOTE: Don't remove class="content-group" that is needed.
|
||||||
|
# Used on sports pages to link to 'similar stories'.
|
||||||
|
secondary_content_reg_exp = '^.*secondary[_ -]*content.*$'
|
||||||
|
|
||||||
|
#<div id="featured-content" class="content-group">
|
||||||
|
# NOTE: Don't remove class="content-group" that is needed.
|
||||||
|
# Used on sports pages to link to pages like 'tables', 'fixtures', etc.
|
||||||
|
featured_content_reg_exp = '^.*featured[_ -]*content.*$'
|
||||||
|
|
||||||
|
#<div id="navigation">
|
||||||
|
# Used on sports pages to link to pages like 'tables', 'fixtures', etc.
|
||||||
|
# Used sometimes instead of "featured-content" above.
|
||||||
|
navigation_reg_exp = '^.*navigation.*$'
|
||||||
|
|
||||||
|
#<a class="skip" href="#blq-container-inner">Skip to top</a>
|
||||||
|
# Used on sports pages to link to the top of the page.
|
||||||
|
skip_reg_exp = '^.*skip.*$'
|
||||||
|
|
||||||
|
# Extra things to remove due to the addition of 'storycontent' in keep_only_tags,
|
||||||
|
# which are the alterative table design based pages. The purpose of some of these
|
||||||
|
# is not entirely clear from the pages (which are a total mess!).
|
||||||
|
|
||||||
|
# Remove mapping based tags, Eg. <map id="world_map">
|
||||||
|
# The dynamic maps don't seem to work during ebook creation. TO DO: Investigate.
|
||||||
|
map_reg_exp = '^.*map.*$'
|
||||||
|
|
||||||
|
# Remove social bookmarking variation, called 'socialBookMarks'.
|
||||||
|
social_bookmarks_reg_exp = '^.*social[_ -]*bookmarks.*$'
|
||||||
|
|
||||||
|
# Remove page navigation tools, like 'search', 'email', 'print', called 'blq-mast'.
|
||||||
|
blq_mast_reg_exp = '^.*blq[_ -]*mast.*$'
|
||||||
|
|
||||||
|
# Remove 'sharesb', I think this is a generic 'sharing' class. It seems to appear
|
||||||
|
# alongside 'socialBookMarks' whenever that appears. I am removing it as well
|
||||||
|
# under the assumption that it can appear alone as well.
|
||||||
|
sharesb_reg_exp = '^.*sharesb.*$'
|
||||||
|
|
||||||
|
# Remove class 'o'. The worst named user created css class of all time. The creator
|
||||||
|
# should immediately be fired. I've seen it used to hold nothing at all but with
|
||||||
|
# 20 or so empty lines in it. Also to hold a single link to another article.
|
||||||
|
# Whatever it was designed to do it is not wanted by this recipe. Exact match only.
|
||||||
|
o_reg_exp = '^o$'
|
||||||
|
|
||||||
|
# Remove 'promotopbg' and 'promobottombg', link lists. Have decided to
|
||||||
|
# use two reg expressions to make removing this (and variants) robust.
|
||||||
|
promo_top_reg_exp = '^.*promotopbg.*$'
|
||||||
|
promo_bottom_reg_exp = '^.*promobottombg.*$'
|
||||||
|
|
||||||
|
# Remove 'nlp', provides heading for link lists. Requires an exact match due to
|
||||||
|
# risk of matching those letters in something needed, unless I see a variation
|
||||||
|
# of 'nlp' used at a later date.
|
||||||
|
nlp_reg_exp = '^nlp$'
|
||||||
|
|
||||||
|
# Remove 'mva', provides embedded floating content of various types. Variant 'mvb'
|
||||||
|
# has also now been seen. Requires an exact match of 'mva' or 'mvb' due to risk of
|
||||||
|
# matching those letters in something needed.
|
||||||
|
mva_or_mvb_reg_exp = '^mv[ab]$'
|
||||||
|
|
||||||
|
# Remove 'mvtb', seems to be page navigation tools, like 'blq-mast'.
|
||||||
|
mvtb_reg_exp = '^mvtb$'
|
||||||
|
|
||||||
|
# Remove 'blq-toplink', class to provide a link to the top of the page.
|
||||||
|
blq_toplink_reg_exp = '^.*blq[_ -]*top[_ -]*link.*$'
|
||||||
|
|
||||||
|
# Remove 'products and services' links, Eg. desktop tools, alerts, and so on.
|
||||||
|
# Eg. Class="servicev4 ukfs_services" - what a mess of a name. Have decided to
|
||||||
|
# use two reg expressions to make removing this (and variants) robust.
|
||||||
|
prods_services_01_reg_exp = '^.*servicev4.*$'
|
||||||
|
prods_services_02_reg_exp = '^.*ukfs[_ -]*services.*$'
|
||||||
|
|
||||||
|
# Remove -what I think is- some kind of navigation tools helper class, though I am
|
||||||
|
# not sure, it's called: 'blq-rst blq-new-nav'. What I do know is it pops up
|
||||||
|
# frequently and it is not wanted. Have decided to use two reg expressions to make
|
||||||
|
# removing this (and variants) robust.
|
||||||
|
blq_misc_01_reg_exp = '^.*blq[_ -]*rst.*$'
|
||||||
|
blq_misc_02_reg_exp = '^.*blq[_ -]*new[_ -]*nav.*$'
|
||||||
|
|
||||||
|
# Remove 'puffbox' - this may only appear inside 'storyextra', so it may not
|
||||||
|
# need removing - I have no clue what it does other than it contains links.
|
||||||
|
# Whatever it is - it is not part of the article and is not wanted.
|
||||||
|
puffbox_reg_exp = '^.*puffbox.*$'
|
||||||
|
|
||||||
|
# Remove 'sibtbg' and 'sibtbgf' - some kind of table formatting classes.
|
||||||
|
sibtbg_reg_exp = '^.*sibtbg.*$'
|
||||||
|
|
||||||
|
# Remove 'storyextra' - links to relevant articles and external sites.
|
||||||
|
storyextra_reg_exp = '^.*story[_ -]*extra.*$'
|
||||||
|
|
||||||
|
|
||||||
|
remove_tags = [ dict(name='div', attrs={'class':re.compile(story_feature_reg_exp, re.IGNORECASE)}),
|
||||||
|
dict(name='div', attrs={'class':re.compile(share_help_reg_exp, re.IGNORECASE)}),
|
||||||
|
dict(name='div', attrs={'class':re.compile(embedded_hyper_reg_exp, re.IGNORECASE)}),
|
||||||
|
dict(name='div', attrs={'class':re.compile(hypertabs_reg_exp, re.IGNORECASE)}),
|
||||||
|
dict(name='div', attrs={'class':re.compile(video_reg_exp, re.IGNORECASE)}),
|
||||||
|
dict(name='div', attrs={'class':re.compile(audio_reg_exp, re.IGNORECASE)}),
|
||||||
|
dict(name='div', attrs={'class':re.compile(picture_gallery_reg_exp, re.IGNORECASE)}),
|
||||||
|
dict(name='div', attrs={'class':re.compile(slideshow_reg_exp, re.IGNORECASE)}),
|
||||||
|
dict(name='div', attrs={'class':re.compile(quote_reg_exp, re.IGNORECASE)}),
|
||||||
|
dict(name='div', attrs={'class':re.compile(hidden_reg_exp, re.IGNORECASE)}),
|
||||||
|
dict(name='div', attrs={'class':re.compile(comment_reg_exp, re.IGNORECASE)}),
|
||||||
|
dict(name='div', attrs={'class':re.compile(story_actions_reg_exp, re.IGNORECASE)}),
|
||||||
|
dict(name='div', attrs={'class':re.compile(bookmark_list_reg_exp, re.IGNORECASE)}),
|
||||||
|
dict(name='div', attrs={'id':re.compile(secondary_content_reg_exp, re.IGNORECASE)}),
|
||||||
|
dict(name='div', attrs={'id':re.compile(featured_content_reg_exp, re.IGNORECASE)}),
|
||||||
|
dict(name='div', attrs={'id':re.compile(navigation_reg_exp, re.IGNORECASE)}),
|
||||||
|
dict(name='form', attrs={'id':re.compile(form_reg_exp, re.IGNORECASE)}),
|
||||||
|
dict(attrs={'class':re.compile(quote_reg_exp, re.IGNORECASE)}),
|
||||||
|
dict(attrs={'class':re.compile(hidden_reg_exp, re.IGNORECASE)}),
|
||||||
|
dict(attrs={'class':re.compile(social_links_reg_exp, re.IGNORECASE)}),
|
||||||
|
dict(attrs={'class':re.compile(comment_reg_exp, re.IGNORECASE)}),
|
||||||
|
dict(attrs={'class':re.compile(skip_reg_exp, re.IGNORECASE)}),
|
||||||
|
dict(name='map', attrs={'id':re.compile(map_reg_exp, re.IGNORECASE)}),
|
||||||
|
dict(name='map', attrs={'name':re.compile(map_reg_exp, re.IGNORECASE)}),
|
||||||
|
dict(name='div', attrs={'id':re.compile(social_bookmarks_reg_exp, re.IGNORECASE)}),
|
||||||
|
dict(name='div', attrs={'id':re.compile(blq_mast_reg_exp, re.IGNORECASE)}),
|
||||||
|
dict(name='div', attrs={'class':re.compile(sharesb_reg_exp, re.IGNORECASE)}),
|
||||||
|
dict(name='div', attrs={'class':re.compile(o_reg_exp, re.IGNORECASE)}),
|
||||||
|
dict(name='div', attrs={'class':re.compile(promo_top_reg_exp, re.IGNORECASE)}),
|
||||||
|
dict(name='div', attrs={'class':re.compile(promo_bottom_reg_exp, re.IGNORECASE)}),
|
||||||
|
dict(name='div', attrs={'class':re.compile(nlp_reg_exp, re.IGNORECASE)}),
|
||||||
|
dict(name='div', attrs={'class':re.compile(mva_or_mvb_reg_exp, re.IGNORECASE)}),
|
||||||
|
dict(name='div', attrs={'class':re.compile(mvtb_reg_exp, re.IGNORECASE)}),
|
||||||
|
dict(name='div', attrs={'class':re.compile(blq_toplink_reg_exp, re.IGNORECASE)}),
|
||||||
|
dict(name='div', attrs={'class':re.compile(prods_services_01_reg_exp, re.IGNORECASE)}),
|
||||||
|
dict(name='div', attrs={'class':re.compile(prods_services_02_reg_exp, re.IGNORECASE)}),
|
||||||
|
dict(name='div', attrs={'class':re.compile(blq_misc_01_reg_exp, re.IGNORECASE)}),
|
||||||
|
dict(name='div', attrs={'class':re.compile(blq_misc_02_reg_exp, re.IGNORECASE)}),
|
||||||
|
dict(name='div', attrs={'class':re.compile(puffbox_reg_exp, re.IGNORECASE)}),
|
||||||
|
dict(attrs={'class':re.compile(sibtbg_reg_exp, re.IGNORECASE)}),
|
||||||
|
dict(attrs={'class':re.compile(storyextra_reg_exp, re.IGNORECASE)})
|
||||||
|
]
|
||||||
|
|
||||||
|
# Uses url to create and return the 'printer friendly' version of the url.
|
||||||
|
# In other words the 'print this page' address of the page.
|
||||||
|
#
|
||||||
|
# There are 3 types of urls used in the BBC site's rss feeds. There is just
|
||||||
|
# 1 type for the standard news while there are 2 used for sports feed urls.
|
||||||
|
# Note: Sports urls are linked from regular news feeds (Eg. 'News Home') when
|
||||||
|
# there is a major story of interest to 'everyone'. So even if no BBC sports
|
||||||
|
# feeds are added to 'feeds' the logic of this method is still needed to avoid
|
||||||
|
# blank / missing / empty articles which have an index title and then no body.
|
||||||
|
def print_version(self, url):
|
||||||
|
|
||||||
|
# Handle sports page urls type 01:
|
||||||
|
if (url.find("go/rss/-/sport1/") != -1):
|
||||||
|
temp_url = url.replace("go/rss/-/", "")
|
||||||
|
|
||||||
|
# Handle sports page urls type 02:
|
||||||
|
elif (url.find("go/rss/int/news/-/sport1/") != -1):
|
||||||
|
temp_url = url.replace("go/rss/int/news/-/", "")
|
||||||
|
|
||||||
|
# Handle regular news page urls:
|
||||||
|
else:
|
||||||
|
temp_url = url.replace("go/rss/int/news/-/", "")
|
||||||
|
|
||||||
|
# Always add "?print=true" to the end of the url.
|
||||||
|
print_url = temp_url + "?print=true"
|
||||||
|
|
||||||
|
return print_url
|
||||||
|
|
||||||
|
|
||||||
|
# Remove articles in feeds based on a string in the article title or url.
|
||||||
|
#
|
||||||
|
# Code logic written by: Starson17 - posted in: "Recipes - Re-usable code"
|
||||||
|
# thread, in post with title: "Remove articles from feed", see url:
|
||||||
|
# http://www.mobileread.com/forums/showpost.php?p=1165462&postcount=6
|
||||||
|
# Many thanks and all credit to Starson17.
|
||||||
|
#
|
||||||
|
# Starson17's code has obviously been altered to suite my requirements.
|
||||||
|
def parse_feeds(self):
|
||||||
|
|
||||||
|
# Call parent's method.
|
||||||
|
feeds = BasicNewsRecipe.parse_feeds(self)
|
||||||
|
|
||||||
|
# Loop through all feeds.
|
||||||
|
for feed in feeds:
|
||||||
|
|
||||||
|
# Loop through all articles in feed.
|
||||||
|
for article in feed.articles[:]:
|
||||||
|
|
||||||
|
# Match key words and remove article if there's a match.
|
||||||
|
|
||||||
|
# Most BBC rss feed video only 'articles' use upper case 'VIDEO'
|
||||||
|
# as a title prefix. Just match upper case 'VIDEO', so that
|
||||||
|
# articles like 'Video game banned' won't be matched and removed.
|
||||||
|
if 'VIDEO' in article.title:
|
||||||
|
feed.articles.remove(article)
|
||||||
|
|
||||||
|
# Most BBC rss feed audio only 'articles' use upper case 'AUDIO'
|
||||||
|
# as a title prefix. Just match upper case 'AUDIO', so that
|
||||||
|
# articles like 'Hi-Def audio...' won't be matched and removed.
|
||||||
|
elif 'AUDIO' in article.title:
|
||||||
|
feed.articles.remove(article)
|
||||||
|
|
||||||
|
# Most BBC rss feed photo slideshow 'articles' use 'In Pictures',
|
||||||
|
# 'In pictures', and 'in pictures', somewhere in their title.
|
||||||
|
# Match any case of that phrase.
|
||||||
|
elif 'IN PICTURES' in article.title.upper():
|
||||||
|
feed.articles.remove(article)
|
||||||
|
|
||||||
|
# As above, but user contributed pictures. Match any case.
|
||||||
|
elif 'YOUR PICTURES' in article.title.upper():
|
||||||
|
feed.articles.remove(article)
|
||||||
|
|
||||||
|
# 'Sportsday Live' are articles which contain a constantly and
|
||||||
|
# dynamically updated 'running commentary' during a live sporting
|
||||||
|
# event. Match any case.
|
||||||
|
elif 'SPORTSDAY LIVE' in article.title.upper():
|
||||||
|
feed.articles.remove(article)
|
||||||
|
|
||||||
|
# Sometimes 'Sportsday Live' (above) becomes 'Live - Sport Name'.
|
||||||
|
# These are being matched below using 'Live - ' because removing all
|
||||||
|
# articles with 'live' in their titles would remove some articles
|
||||||
|
# that are in fact not live sports pages. Match any case.
|
||||||
|
elif 'LIVE - ' in article.title.upper():
|
||||||
|
feed.articles.remove(article)
|
||||||
|
|
||||||
|
# 'Quiz of the week' is a Flash player weekly news quiz. Match only
|
||||||
|
# the 'Quiz of the' part in anticipation of monthly and yearly
|
||||||
|
# variants. Match any case.
|
||||||
|
elif 'QUIZ OF THE' in article.title.upper():
|
||||||
|
feed.articles.remove(article)
|
||||||
|
|
||||||
|
# Remove articles with 'scorecards' in the url. These are BBC sports
|
||||||
|
# pages which just display a cricket scorecard. The pages have a mass
|
||||||
|
# of table and css entries to display the scorecards nicely. Probably
|
||||||
|
# could make them work with this recipe, but might take a whole day
|
||||||
|
# of work to sort out all the css - basically a formatting nightmare.
|
||||||
|
elif 'scorecards' in article.url:
|
||||||
|
feed.articles.remove(article)
|
||||||
|
|
||||||
|
return feeds
|
||||||
|
|
||||||
|
# End of class and file.
|
||||||
|
@ -1,35 +1,43 @@
|
|||||||
#!/usr/bin/env python
|
#!/usr/bin/env python
|
||||||
__license__ = 'GPL v3'
|
__license__ = 'GPL v3'
|
||||||
__author__ = 'Gerardo Diez'
|
__copyright__ = '5, January 2011 Gerardo Diez<gerardo.diez.garcia@gmail.com> & desUBIKado'
|
||||||
__copyright__ = 'Gerardo Diez<gerardo.diez.garcia@gmail.com>'
|
__author__ = 'desUBIKado, based on an earlier version by Gerardo Diez'
|
||||||
description = 'Main daily newspaper from Spain - v1.00 (05, Enero 2011)'
|
__version__ = 'v1.01'
|
||||||
__docformat__ = 'restructuredtext en'
|
__date__ = '13, November 2011'
|
||||||
|
|
||||||
'''
|
'''
|
||||||
expansion.es
|
[url]http://www.expansion.com/[/url]
|
||||||
'''
|
'''
|
||||||
|
|
||||||
|
import time
|
||||||
|
import re
|
||||||
from calibre.web.feeds.recipes import BasicNewsRecipe
|
from calibre.web.feeds.recipes import BasicNewsRecipe
|
||||||
class Publico(BasicNewsRecipe):
|
|
||||||
title =u'Expansion.com'
|
class expansion_spanish(BasicNewsRecipe):
|
||||||
__author__ ='Gerardo Diez'
|
__author__ ='Gerardo Diez & desUBIKado'
|
||||||
publisher =u'Unidad Editorial Información Económica, S.L.'
|
description ='Financial news from Spain'
|
||||||
category ='finances, catalunya'
|
title =u'Expansion'
|
||||||
oldest_article =1
|
publisher =u'Unidad Editorial Internet, S.L.'
|
||||||
|
category ='news, finances, Spain'
|
||||||
|
oldest_article = 2
|
||||||
|
simultaneous_downloads = 10
|
||||||
max_articles_per_feed =100
|
max_articles_per_feed =100
|
||||||
simultaneous_downloads =10
|
timefmt = '[%a, %d %b, %Y]'
|
||||||
cover_url =u'http://estaticos01.expansion.com/iconos/v2.x/v2.0/cabeceras/logo_expansion.png'
|
encoding ='iso-8859-15'
|
||||||
timefmt ='[%A, %d %B, %Y]'
|
|
||||||
encoding ='latin'
|
|
||||||
language ='es'
|
language ='es'
|
||||||
remove_javascript =True
|
use_embedded_content = False
|
||||||
no_stylesheets =True
|
remove_javascript = True
|
||||||
|
no_stylesheets = True
|
||||||
|
remove_empty_feeds = True
|
||||||
|
|
||||||
keep_only_tags =dict(name='div', attrs={'class':['noticia primer_elemento']})
|
keep_only_tags =dict(name='div', attrs={'class':['noticia primer_elemento']})
|
||||||
|
|
||||||
remove_tags =[
|
remove_tags =[
|
||||||
dict(name='div', attrs={'class':['compartir', 'metadata_desarrollo_noticia', 'relacionadas', 'mas_info','publicidad publicidad_textlink', 'ampliarfoto']}),
|
dict(name='div', attrs={'class':['compartir', 'metadata_desarrollo_noticia', 'relacionadas', 'mas_info','publicidad publicidad_textlink', 'ampliarfoto','tit_relacionadas','interact','paginacion estirar','sumario derecha']}),
|
||||||
dict(name='ul', attrs={'class':['bolos_desarrollo_noticia']}),
|
dict(name='ul', attrs={'class':['bolos_desarrollo_noticia','not_logged']}),
|
||||||
dict(name='span', attrs={'class':['comentarios']}),
|
dict(name='span', attrs={'class':['comentarios']}),
|
||||||
dict(name='p', attrs={'class':['cintillo_comentarios', 'cintillo_comentarios formulario']}),
|
dict(name='p', attrs={'class':['cintillo_comentarios', 'cintillo_comentarios formulario']}),
|
||||||
dict(name='div', attrs={'id':['comentarios_lectores_listado']})
|
dict(name='div', attrs={'id':['comentarios_lectores_listado','comentar']})
|
||||||
]
|
]
|
||||||
feeds =[
|
feeds =[
|
||||||
(u'Portada', u'http://estaticos.expansion.com/rss/portada.xml'),
|
(u'Portada', u'http://estaticos.expansion.com/rss/portada.xml'),
|
||||||
@ -38,42 +46,112 @@ class Publico(BasicNewsRecipe):
|
|||||||
(u'Euribor', u'http://estaticos.expansion.com/rss/mercadoseuribor.xml'),
|
(u'Euribor', u'http://estaticos.expansion.com/rss/mercadoseuribor.xml'),
|
||||||
(u'Materias Primas', u'http://estaticos.expansion.com/rss/mercadosmateriasprimas.xml'),
|
(u'Materias Primas', u'http://estaticos.expansion.com/rss/mercadosmateriasprimas.xml'),
|
||||||
(u'Renta Fija', u'http://estaticos.expansion.com/rss/mercadosrentafija.xml'),
|
(u'Renta Fija', u'http://estaticos.expansion.com/rss/mercadosrentafija.xml'),
|
||||||
|
|
||||||
(u'Portada: Mi Dinero', u'http://estaticos.expansion.com/rss/midinero.xml'),
|
(u'Portada: Mi Dinero', u'http://estaticos.expansion.com/rss/midinero.xml'),
|
||||||
(u'Hipotecas', u'http://estaticos.expansion.com/rss/midinerohipotecas.xml'),
|
(u'Hipotecas', u'http://estaticos.expansion.com/rss/midinerohipotecas.xml'),
|
||||||
(u'Créditos', u'http://estaticos.expansion.com/rss/midinerocreditos.xml'),
|
(u'Cr\xe9ditos', u'http://estaticos.expansion.com/rss/midinerocreditos.xml'),
|
||||||
(u'Pensiones', u'http://estaticos.expansion.com/rss/midineropensiones.xml'),
|
(u'Pensiones', u'http://estaticos.expansion.com/rss/midineropensiones.xml'),
|
||||||
(u'Fondos de Inversión', u'http://estaticos.expansion.com/rss/midinerofondos.xml'),
|
(u'Fondos de Inversi\xf3n', u'http://estaticos.expansion.com/rss/midinerofondos.xml'),
|
||||||
(u'Motor', u'http://estaticos.expansion.com/rss/midineromotor.xml'),
|
(u'Motor', u'http://estaticos.expansion.com/rss/midineromotor.xml'),
|
||||||
|
|
||||||
(u'Portada: Empresas', u'http://estaticos.expansion.com/rss/empresas.xml'),
|
(u'Portada: Empresas', u'http://estaticos.expansion.com/rss/empresas.xml'),
|
||||||
(u'Banca', u'http://estaticos.expansion.com/rss/empresasbanca.xml'),
|
(u'Banca', u'http://estaticos.expansion.com/rss/empresasbanca.xml'),
|
||||||
(u'TMT', u'http://estaticos.expansion.com/rss/empresastmt.xml'),
|
(u'TMT', u'http://estaticos.expansion.com/rss/empresastmt.xml'),
|
||||||
(u'Energía', u'http://estaticos.expansion.com/rss/empresasenergia.xml'),
|
(u'Energ\xeda', u'http://estaticos.expansion.com/rss/empresasenergia.xml'),
|
||||||
(u'Inmobiliario y Construcción', u'http://estaticos.expansion.com/rss/empresasinmobiliario.xml'),
|
(u'Inmobiliario y Construcci\xf3n', u'http://estaticos.expansion.com/rss/empresasinmobiliario.xml'),
|
||||||
(u'Transporte y Turismo', u'http://estaticos.expansion.com/rss/empresastransporte.xml'),
|
(u'Transporte y Turismo', u'http://estaticos.expansion.com/rss/empresastransporte.xml'),
|
||||||
(u'Automoción e Industria', u'http://estaticos.expansion.com/rss/empresasauto-industria.xml'),
|
(u'Automoci\xf3n e Industria', u'http://estaticos.expansion.com/rss/empresasauto-industria.xml'),
|
||||||
(u'Distribución', u'http://estaticos.expansion.com/rss/empresasdistribucion.xml'),
|
(u'Distribuci\xf3n', u'http://estaticos.expansion.com/rss/empresasdistribucion.xml'),
|
||||||
(u'Deporte y Negocio', u' http://estaticos.expansion.com/rss/empresasdeporte.xml'),
|
(u'Deporte y Negocio', u' [url]http://estaticos.expansion.com/rss/empresasdeporte.xml[/url]'),
|
||||||
(u'Mi Negocio', u'http://estaticos.expansion.com/rss/empresasminegocio.xml'),
|
(u'Mi Negocio', u'http://estaticos.expansion.com/rss/empresasminegocio.xml'),
|
||||||
(u'Interiores', u'http://estaticos.expansion.com/rss/empresasinteriores.xml'),
|
(u'Interiores', u'http://estaticos.expansion.com/rss/empresasinteriores.xml'),
|
||||||
(u'Digitech', u'http://estaticos.expansion.com/rss/empresasdigitech.xml'),
|
(u'Digitech', u'http://estaticos.expansion.com/rss/empresasdigitech.xml'),
|
||||||
|
(u'Portada: Econom\xeda y Pol\xedtica', u'http://estaticos.expansion.com/rss/economiapolitica.xml'),
|
||||||
(u'Portada: Economía y Política', u'http://estaticos.expansion.com/rss/economiapolitica.xml'),
|
(u'Pol\xedtica', u'http://estaticos.expansion.com/rss/economia.xml'),
|
||||||
(u'Política', u'http://estaticos.expansion.com/rss/economia.xml'),
|
|
||||||
(u'Portada: Sociedad', u'http://estaticos.expansion.com/rss/entorno.xml'),
|
(u'Portada: Sociedad', u'http://estaticos.expansion.com/rss/entorno.xml'),
|
||||||
|
(u'Portada: Opini\xf3n', u'http://estaticos.expansion.com/rss/opinion.xml'),
|
||||||
(u'Portada: Opinión', u'http://estaticos.expansion.com/rss/opinion.xml'),
|
|
||||||
(u'Llaves y editoriales', u'http://estaticos.expansion.com/rss/opinioneditorialyllaves.xml'),
|
(u'Llaves y editoriales', u'http://estaticos.expansion.com/rss/opinioneditorialyllaves.xml'),
|
||||||
(u'Tribunas', u'http://estaticos.expansion.com/rss/opiniontribunas.xml'),
|
(u'Tribunas', u'http://estaticos.expansion.com/rss/opiniontribunas.xml'),
|
||||||
|
(u'Portada: Jur\xeddico', u'http://estaticos.expansion.com/rss/juridico.xml'),
|
||||||
(u'Portada: Jurídico', u'http://estaticos.expansion.com/rss/juridico.xml'),
|
|
||||||
(u'Entrevistas', u'http://estaticos.expansion.com/rss/juridicoentrevistas.xml'),
|
(u'Entrevistas', u'http://estaticos.expansion.com/rss/juridicoentrevistas.xml'),
|
||||||
(u'Opinión', u'http://estaticos.expansion.com/rss/juridicoopinion.xml'),
|
(u'Opini\xf3n', u'http://estaticos.expansion.com/rss/juridicoopinion.xml'),
|
||||||
(u'Sentencias', u'http://estaticos.expansion.com/rss/juridicosentencias.xml'),
|
(u'Sentencias', u'http://estaticos.expansion.com/rss/juridicosentencias.xml'),
|
||||||
|
|
||||||
(u'Mujer', u'http://estaticos.expansion.com/rss/mujer-empresa.xml'),
|
(u'Mujer', u'http://estaticos.expansion.com/rss/mujer-empresa.xml'),
|
||||||
(u'Cataluña', u'http://estaticos.expansion.com/rss/catalunya.xml'),
|
(u'Catalu\xf1a', u'http://estaticos.expansion.com/rss/catalunya.xml'),
|
||||||
(u'Función pública', u'http://estaticos.expansion.com/rss/funcion-publica.xml')
|
(u'Funci\xf3n p\xfablica', u'http://estaticos.expansion.com/rss/funcion-publica.xml')
|
||||||
]
|
]
|
||||||
|
|
||||||
|
# Obtener la imagen de portada
|
||||||
|
|
||||||
|
def get_cover_url(self):
|
||||||
|
cover = None
|
||||||
|
st = time.localtime()
|
||||||
|
year = str(st.tm_year)
|
||||||
|
month = "%.2d" % st.tm_mon
|
||||||
|
day = "%.2d" % st.tm_mday
|
||||||
|
#[url]http://img5.kiosko.net/2011/11/14/es/expansion.750.jpg[/url]
|
||||||
|
cover='http://img5.kiosko.net/'+ year + '/' + month + '/' + day +'/es/expansion.750.jpg'
|
||||||
|
br = BasicNewsRecipe.get_browser()
|
||||||
|
try:
|
||||||
|
br.open(cover)
|
||||||
|
except:
|
||||||
|
self.log("\nPortada no disponible")
|
||||||
|
cover ='http://www.aproahp.org/enlaces/images/diario_expansion.gif'
|
||||||
|
return cover
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
# Para que no salte la publicidad al recuperar la noticia, y que siempre se recupere
|
||||||
|
# la página web, mando la variable "t" con la hora "linux" o "epoch" actual
|
||||||
|
# haciendole creer al sitio web que justo se acaba de ver la publicidad
|
||||||
|
|
||||||
|
def print_version(self, url):
|
||||||
|
st = time.time()
|
||||||
|
segundos = str(int(st))
|
||||||
|
parametros = '.html?t=' + segundos
|
||||||
|
return url.replace('.html', parametros)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
_processed_links = []
|
||||||
|
|
||||||
|
def get_article_url(self, article):
|
||||||
|
|
||||||
|
# Para obtener la url original del artículo a partir de la de "feedsportal"
|
||||||
|
|
||||||
|
link = article.get('link', None)
|
||||||
|
if link is None:
|
||||||
|
return article
|
||||||
|
if link.split('/')[-1]=="story01.htm":
|
||||||
|
link=link.split('/')[-2]
|
||||||
|
a=['0B','0C','0D','0E','0F','0G','0N' ,'0L0S','0A']
|
||||||
|
b=['.' ,'/' ,'?' ,'-' ,'=' ,'&' ,'.com','www.','0']
|
||||||
|
for i in range(0,len(a)):
|
||||||
|
link=link.replace(a[i],b[i])
|
||||||
|
link="http://"+link
|
||||||
|
|
||||||
|
# Eliminar artículos duplicados en otros feeds
|
||||||
|
|
||||||
|
if not (link in self._processed_links):
|
||||||
|
self._processed_links.append(link)
|
||||||
|
else:
|
||||||
|
link = None
|
||||||
|
|
||||||
|
return link
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
# Un poco de css para mejorar la presentación de las noticias
|
||||||
|
|
||||||
|
extra_css = '''
|
||||||
|
.entradilla {font-family:Arial,Helvetica,sans-serif; font-weight:bold; font-style:italic; font-size:16px;}
|
||||||
|
.fecha_publicacion,.autor {font-family:Arial,Helvetica,sans-serif; font-weight:bold;font-size:14px;}
|
||||||
|
'''
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
# Para presentar la imagen de los videos incrustados
|
||||||
|
|
||||||
|
preprocess_regexps = [
|
||||||
|
(re.compile(r'var imagen', re.DOTALL|re.IGNORECASE), lambda match: '--></script><img src'),
|
||||||
|
(re.compile(r'.jpg";', re.DOTALL|re.IGNORECASE), lambda match: '.jpg">'),
|
||||||
|
(re.compile(r'var id_reproductor', re.DOTALL|re.IGNORECASE), lambda match: '<script language="Javascript" type="text/javascript"><!--'),
|
||||||
|
]
|
||||||
|
@ -80,59 +80,11 @@ class Nin(BasicNewsRecipe):
|
|||||||
return self.PREFIX + item.img['src']
|
return self.PREFIX + item.img['src']
|
||||||
return cover_url
|
return cover_url
|
||||||
|
|
||||||
def parse_index(self):
|
feeds = [(u'NIN Online', u'http://www.nin.co.rs/misc/rss.php?feed=RSS2.0')]
|
||||||
articles = []
|
|
||||||
count = 0
|
def get_article_url(self, article):
|
||||||
soup = self.index_to_soup(self.INDEX)
|
url = BasicNewsRecipe.get_article_url(self, article)
|
||||||
for item in soup.findAll('a',attrs={'class':'lmeninavFont'}):
|
return url.replace('.co.yu', '.co.rs')
|
||||||
count = count +1
|
|
||||||
if self.test and count > 2:
|
|
||||||
return articles
|
|
||||||
section = self.tag_to_string(item)
|
|
||||||
feedlink = self.PREFIX + item['href']
|
|
||||||
feedpage = self.index_to_soup(feedlink)
|
|
||||||
self.report_progress(0, _('Fetching feed')+' %s...'%(section))
|
|
||||||
inarts = []
|
|
||||||
for art in feedpage.findAll('span',attrs={'class':'artTitle'}):
|
|
||||||
alink = art.parent
|
|
||||||
url = self.PREFIX + alink['href']
|
|
||||||
title = self.tag_to_string(art)
|
|
||||||
sparent = alink.parent
|
|
||||||
alink.extract()
|
|
||||||
description = self.tag_to_string(sparent)
|
|
||||||
date = strftime(self.timefmt)
|
|
||||||
inarts.append({
|
|
||||||
'title' :title
|
|
||||||
,'date' :date
|
|
||||||
,'url' :url
|
|
||||||
,'description':description
|
|
||||||
})
|
|
||||||
articles.append((section,inarts))
|
|
||||||
return articles
|
|
||||||
|
|
||||||
def index_to_soup(self, url_or_raw, raw=False):
|
|
||||||
if re.match(r'\w+://', url_or_raw):
|
|
||||||
open_func = getattr(self.browser, 'open_novisit', self.browser.open)
|
|
||||||
with closing(open_func(url_or_raw)) as f:
|
|
||||||
_raw = f.read()
|
|
||||||
if not _raw:
|
|
||||||
raise RuntimeError('Could not fetch index from %s'%url_or_raw)
|
|
||||||
else:
|
|
||||||
_raw = url_or_raw
|
|
||||||
if raw:
|
|
||||||
return _raw
|
|
||||||
if not isinstance(_raw, unicode) and self.encoding:
|
|
||||||
if callable(self.encoding):
|
|
||||||
_raw = self.encoding(_raw)
|
|
||||||
else:
|
|
||||||
_raw = _raw.decode(self.encoding, 'replace')
|
|
||||||
massage = list(BeautifulSoup.MARKUP_MASSAGE)
|
|
||||||
enc = 'cp1252' if callable(self.encoding) or self.encoding is None else self.encoding
|
|
||||||
massage.append((re.compile(r'&(\S+?);'), lambda match:
|
|
||||||
entity_to_unicode(match, encoding=enc)))
|
|
||||||
massage.append((re.compile(r'[\x00-\x08]+'), lambda match:
|
|
||||||
''))
|
|
||||||
return BeautifulSoup(_raw, markupMassage=massage)
|
|
||||||
|
|
||||||
def preprocess_html(self, soup):
|
def preprocess_html(self, soup):
|
||||||
for item in soup.findAll(style=True):
|
for item in soup.findAll(style=True):
|
||||||
|
@ -11,17 +11,16 @@ from calibre.web.feeds.news import BasicNewsRecipe
|
|||||||
|
|
||||||
class Salon_com(BasicNewsRecipe):
|
class Salon_com(BasicNewsRecipe):
|
||||||
title = 'Salon.com'
|
title = 'Salon.com'
|
||||||
__author__ = 'cix3'
|
__author__ = 'Kovid Goyal'
|
||||||
description = 'Salon.com - Breaking news, opinion, politics, entertainment, sports and culture.'
|
description = 'Salon.com - Breaking news, opinion, politics, entertainment, sports and culture.'
|
||||||
timefmt = ' [%b %d, %Y]'
|
timefmt = ' [%b %d, %Y]'
|
||||||
language = 'en'
|
language = 'en'
|
||||||
|
|
||||||
oldest_article = 7
|
oldest_article = 7
|
||||||
max_articles_per_feed = 100
|
max_articles_per_feed = 100
|
||||||
|
auto_cleanup = True
|
||||||
remove_tags = [dict(name='div', attrs={'class':['ad_content', 'clearfix']}), dict(name='hr'), dict(name='img')]
|
auto_cleanup_keep = '//div[@class="art"]'
|
||||||
|
remove_empty_feeds = True
|
||||||
remove_tags_before = dict(name='h2')
|
|
||||||
|
|
||||||
feeds = [
|
feeds = [
|
||||||
('News & Politics', 'http://feeds.salon.com/salon/news'),
|
('News & Politics', 'http://feeds.salon.com/salon/news'),
|
||||||
@ -40,5 +39,5 @@ class Salon_com(BasicNewsRecipe):
|
|||||||
]
|
]
|
||||||
|
|
||||||
def print_version(self, url):
|
def print_version(self, url):
|
||||||
return url.replace('/index.html', '/print.html')
|
return url + '/print/'
|
||||||
|
|
||||||
|
17
recipes/worldcrunch.recipe
Normal file
17
recipes/worldcrunch.recipe
Normal file
@ -0,0 +1,17 @@
|
|||||||
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
|
||||||
|
class Worldcrunch(BasicNewsRecipe):
|
||||||
|
title = u'Worldcrunch'
|
||||||
|
__author__ = 'Krittika Goyal'
|
||||||
|
oldest_article = 1 #days
|
||||||
|
max_articles_per_feed = 25
|
||||||
|
use_embedded_content = False
|
||||||
|
|
||||||
|
no_stylesheets = True
|
||||||
|
auto_cleanup = True
|
||||||
|
|
||||||
|
|
||||||
|
feeds = [
|
||||||
|
('News',
|
||||||
|
'http://www.worldcrunch.com/feed'),
|
||||||
|
]
|
@ -201,15 +201,49 @@ save_template_title_series_sorting = 'library_order'
|
|||||||
|
|
||||||
#: Set the list of words considered to be "articles" for sort strings
|
#: Set the list of words considered to be "articles" for sort strings
|
||||||
# Set the list of words that are to be considered 'articles' when computing the
|
# Set the list of words that are to be considered 'articles' when computing the
|
||||||
# title sort strings. The list is a regular expression, with the articles
|
# title sort strings. The articles differ by language. By default, calibre uses
|
||||||
# separated by 'or' bars. Comparisons are case insensitive, and that cannot be
|
# a combination of articles from English and whatever language the calibre user
|
||||||
# changed. Changes to this tweak won't have an effect until the book is modified
|
# interface is set to. In addition, in some contexts where the book language is
|
||||||
# in some way. If you enter an invalid pattern, it is silently ignored.
|
# available, the language of the book is used. You can change the list of
|
||||||
# To disable use the expression: '^$'
|
# articles for a given language or add a new language by editing
|
||||||
# This expression is designed for articles that are followed by spaces. If you
|
# per_language_title_sort_articles. To tell calibre to use a language other
|
||||||
# also need to match articles that are followed by other characters, for example L'
|
# than the user interface language, set, default_language_for_title_sort. For
|
||||||
# in French, use: "^(A\s+|The\s+|An\s+|L')" instead.
|
# example, to use German, set it to 'deu'. A value of None means the user
|
||||||
# Default: '^(A|The|An)\s+'
|
# interface language is used. The setting title_sort_articles is ignored
|
||||||
|
# (present only for legacy reasons).
|
||||||
|
per_language_title_sort_articles = {
|
||||||
|
# English
|
||||||
|
'eng' : (r'A\s+', r'The\s+', r'An\s+'),
|
||||||
|
# Spanish
|
||||||
|
'spa' : (r'El\s+', r'La\s+', r'Lo\s+', r'Los\s+', r'Las\s+', r'Un\s+',
|
||||||
|
r'Una\s+', r'Unos\s+', r'Unas\s+'),
|
||||||
|
# French
|
||||||
|
'fra' : (r'Le\s+', r'La\s+', r"L'", r'Les\s+', r'Un\s+', r'Une\s+',
|
||||||
|
r'Des\s+'),
|
||||||
|
# Italian
|
||||||
|
'ita' : (r'Lo\s+', r'Il\s+', r"L'", r'La\s+', r'Gli\s+', r'I\s+',
|
||||||
|
r'Le\s+', ),
|
||||||
|
# Portuguese
|
||||||
|
'por' : (r'A\s+', r'O\s+', r'Os\s+', r'As\s+', r'Um\s+', r'Uns\s+',
|
||||||
|
r'Uma\s+', r'Umas\s+', ),
|
||||||
|
# Romanian
|
||||||
|
'ron' : (r'Un\s+', r'O\s+', r'Nişte\s+', ),
|
||||||
|
# German
|
||||||
|
'deu' : (r'Der\s+', r'Die\s+', r'Das\s+', r'Den\s+', r'Ein\s+',
|
||||||
|
r'Eine\s+', r'Einen\s+', ),
|
||||||
|
# Dutch
|
||||||
|
'nld' : (r'De\s+', r'Het\s+', r'Een\s+', ),
|
||||||
|
# Swedish
|
||||||
|
'swe' : (r'En\s+', r'Ett\s+', r'Det\s+', r'Den\s+', r'De\s+', ),
|
||||||
|
# Turkish
|
||||||
|
'tur' : (r'Bir\s+', ),
|
||||||
|
# Afrikaans
|
||||||
|
'afr' : (r"'n\s+", r'Die\s+', ),
|
||||||
|
# Greek
|
||||||
|
'ell' : (r'O\s+', r'I\s+', r'To\s+', r'Ta\s+', r'Tus\s+', r'Tis\s+',
|
||||||
|
r"'Enas\s+", r"'Mia\s+", r"'Ena\s+", r"'Enan\s+", ),
|
||||||
|
}
|
||||||
|
default_language_for_title_sort = None
|
||||||
title_sort_articles=r'^(A|The|An)\s+'
|
title_sort_articles=r'^(A|The|An)\s+'
|
||||||
|
|
||||||
#: Specify a folder calibre should connect to at startup
|
#: Specify a folder calibre should connect to at startup
|
||||||
|
@ -567,7 +567,7 @@ from calibre.devices.nuut2.driver import NUUT2
|
|||||||
from calibre.devices.iriver.driver import IRIVER_STORY
|
from calibre.devices.iriver.driver import IRIVER_STORY
|
||||||
from calibre.devices.binatone.driver import README
|
from calibre.devices.binatone.driver import README
|
||||||
from calibre.devices.hanvon.driver import (N516, EB511, ALEX, AZBOOKA, THEBOOK,
|
from calibre.devices.hanvon.driver import (N516, EB511, ALEX, AZBOOKA, THEBOOK,
|
||||||
LIBREAIR)
|
LIBREAIR, ODYSSEY)
|
||||||
from calibre.devices.edge.driver import EDGE
|
from calibre.devices.edge.driver import EDGE
|
||||||
from calibre.devices.teclast.driver import (TECLAST_K3, NEWSMY, IPAPYRUS,
|
from calibre.devices.teclast.driver import (TECLAST_K3, NEWSMY, IPAPYRUS,
|
||||||
SOVOS, PICO, SUNSTECH_EB700, ARCHOS7O, STASH, WEXLER)
|
SOVOS, PICO, SUNSTECH_EB700, ARCHOS7O, STASH, WEXLER)
|
||||||
@ -689,7 +689,7 @@ plugins += [
|
|||||||
IPAPYRUS,
|
IPAPYRUS,
|
||||||
EDGE,
|
EDGE,
|
||||||
SNE,
|
SNE,
|
||||||
ALEX,
|
ALEX, ODYSSEY,
|
||||||
PALMPRE,
|
PALMPRE,
|
||||||
KOBO,
|
KOBO,
|
||||||
AZBOOKA,
|
AZBOOKA,
|
||||||
|
@ -757,6 +757,7 @@ output_profiles = [OutputProfile, SonyReaderOutput, SonyReader300Output,
|
|||||||
iPadOutput, KoboReaderOutput, TabletOutput, SamsungGalaxy,
|
iPadOutput, KoboReaderOutput, TabletOutput, SamsungGalaxy,
|
||||||
SonyReaderLandscapeOutput, KindleDXOutput, IlliadOutput,
|
SonyReaderLandscapeOutput, KindleDXOutput, IlliadOutput,
|
||||||
IRexDR1000Output, IRexDR800Output, JetBook5Output, NookOutput,
|
IRexDR1000Output, IRexDR800Output, JetBook5Output, NookOutput,
|
||||||
BambookOutput, NookColorOutput, PocketBook900Output, GenericEink, GenericEinkLarge]
|
BambookOutput, NookColorOutput, PocketBook900Output, GenericEink,
|
||||||
|
GenericEinkLarge, KindleFireOutput]
|
||||||
|
|
||||||
output_profiles.sort(cmp=lambda x,y:cmp(x.name.lower(), y.name.lower()))
|
output_profiles.sort(cmp=lambda x,y:cmp(x.name.lower(), y.name.lower()))
|
||||||
|
@ -166,12 +166,12 @@ class ANDROID(USBMS):
|
|||||||
'MB525', 'ANDROID2.3', 'SGH-I997', 'GT-I5800_CARD', 'MB612',
|
'MB525', 'ANDROID2.3', 'SGH-I997', 'GT-I5800_CARD', 'MB612',
|
||||||
'GT-S5830_CARD', 'GT-S5570_CARD', 'MB870', 'MID7015A',
|
'GT-S5830_CARD', 'GT-S5570_CARD', 'MB870', 'MID7015A',
|
||||||
'ALPANDIGITAL', 'ANDROID_MID', 'VTAB1008', 'EMX51_BBG_ANDROI',
|
'ALPANDIGITAL', 'ANDROID_MID', 'VTAB1008', 'EMX51_BBG_ANDROI',
|
||||||
'UMS', '.K080', 'P990']
|
'UMS', '.K080', 'P990', 'LTE']
|
||||||
WINDOWS_CARD_A_MEM = ['ANDROID_PHONE', 'GT-I9000_CARD', 'SGH-I897',
|
WINDOWS_CARD_A_MEM = ['ANDROID_PHONE', 'GT-I9000_CARD', 'SGH-I897',
|
||||||
'FILE-STOR_GADGET', 'SGH-T959', 'SAMSUNG_ANDROID', 'GT-P1000_CARD',
|
'FILE-STOR_GADGET', 'SGH-T959', 'SAMSUNG_ANDROID', 'GT-P1000_CARD',
|
||||||
'A70S', 'A101IT', '7', 'INCREDIBLE', 'A7EB', 'SGH-T849_CARD',
|
'A70S', 'A101IT', '7', 'INCREDIBLE', 'A7EB', 'SGH-T849_CARD',
|
||||||
'__UMS_COMPOSITE', 'SGH-I997_CARD', 'MB870', 'ALPANDIGITAL',
|
'__UMS_COMPOSITE', 'SGH-I997_CARD', 'MB870', 'ALPANDIGITAL',
|
||||||
'ANDROID_MID', 'P990_SD_CARD', '.K080']
|
'ANDROID_MID', 'P990_SD_CARD', '.K080', 'LTE_CARD']
|
||||||
|
|
||||||
OSX_MAIN_MEM = 'Android Device Main Memory'
|
OSX_MAIN_MEM = 'Android Device Main Memory'
|
||||||
|
|
||||||
|
@ -217,6 +217,7 @@ class ITUNES(DriverBase):
|
|||||||
# 0x1297 iPhone 4
|
# 0x1297 iPhone 4
|
||||||
# 0x129a iPad
|
# 0x129a iPad
|
||||||
# 0x129f iPad2 (WiFi)
|
# 0x129f iPad2 (WiFi)
|
||||||
|
# 0x12a0 iPhone 4S
|
||||||
# 0x12a2 iPad2 (GSM)
|
# 0x12a2 iPad2 (GSM)
|
||||||
# 0x12a3 iPad2 (CDMA)
|
# 0x12a3 iPad2 (CDMA)
|
||||||
VENDOR_ID = [0x05ac]
|
VENDOR_ID = [0x05ac]
|
||||||
@ -1305,6 +1306,8 @@ class ITUNES(DriverBase):
|
|||||||
if DEBUG:
|
if DEBUG:
|
||||||
self.log.info(" ITUNES._add_new_copy()")
|
self.log.info(" ITUNES._add_new_copy()")
|
||||||
|
|
||||||
|
self._update_epub_metadata(fpath, metadata)
|
||||||
|
|
||||||
db_added = None
|
db_added = None
|
||||||
lb_added = None
|
lb_added = None
|
||||||
|
|
||||||
@ -1409,10 +1412,16 @@ class ITUNES(DriverBase):
|
|||||||
tmp_cover.write(cover_data)
|
tmp_cover.write(cover_data)
|
||||||
|
|
||||||
if lb_added:
|
if lb_added:
|
||||||
if lb_added.Artwork.Count:
|
try:
|
||||||
lb_added.Artwork.Item(1).SetArtworkFromFile(tc)
|
if lb_added.Artwork.Count:
|
||||||
else:
|
lb_added.Artwork.Item(1).SetArtworkFromFile(tc)
|
||||||
lb_added.AddArtworkFromFile(tc)
|
else:
|
||||||
|
lb_added.AddArtworkFromFile(tc)
|
||||||
|
except:
|
||||||
|
if DEBUG:
|
||||||
|
self.log.warning(" iTunes automation interface reported an error"
|
||||||
|
" when adding artwork to '%s' in the iTunes Library" % metadata.title)
|
||||||
|
pass
|
||||||
|
|
||||||
if db_added:
|
if db_added:
|
||||||
if db_added.Artwork.Count:
|
if db_added.Artwork.Count:
|
||||||
@ -2638,68 +2647,61 @@ class ITUNES(DriverBase):
|
|||||||
|
|
||||||
# Refresh epub metadata
|
# Refresh epub metadata
|
||||||
with open(fpath,'r+b') as zfo:
|
with open(fpath,'r+b') as zfo:
|
||||||
# Touch the OPF timestamp
|
if False:
|
||||||
try:
|
try:
|
||||||
zf_opf = ZipFile(fpath,'r')
|
zf_opf = ZipFile(fpath,'r')
|
||||||
fnames = zf_opf.namelist()
|
fnames = zf_opf.namelist()
|
||||||
opf = [x for x in fnames if '.opf' in x][0]
|
opf = [x for x in fnames if '.opf' in x][0]
|
||||||
except:
|
except:
|
||||||
raise UserFeedback("'%s' is not a valid EPUB" % metadata.title,
|
raise UserFeedback("'%s' is not a valid EPUB" % metadata.title,
|
||||||
None,
|
None,
|
||||||
level=UserFeedback.WARN)
|
level=UserFeedback.WARN)
|
||||||
|
|
||||||
|
#Touch the OPF timestamp
|
||||||
|
opf_tree = etree.fromstring(zf_opf.read(opf))
|
||||||
|
md_els = opf_tree.xpath('.//*[local-name()="metadata"]')
|
||||||
|
if md_els:
|
||||||
|
ts = md_els[0].find('.//*[@name="calibre:timestamp"]')
|
||||||
|
if ts is not None:
|
||||||
|
timestamp = ts.get('content')
|
||||||
|
old_ts = parse_date(timestamp)
|
||||||
|
metadata.timestamp = datetime.datetime(old_ts.year, old_ts.month, old_ts.day, old_ts.hour,
|
||||||
|
old_ts.minute, old_ts.second, old_ts.microsecond+1, old_ts.tzinfo)
|
||||||
|
if DEBUG:
|
||||||
|
self.log.info(" existing timestamp: %s" % metadata.timestamp)
|
||||||
|
else:
|
||||||
|
metadata.timestamp = now()
|
||||||
|
if DEBUG:
|
||||||
|
self.log.info(" add timestamp: %s" % metadata.timestamp)
|
||||||
|
|
||||||
opf_tree = etree.fromstring(zf_opf.read(opf))
|
|
||||||
md_els = opf_tree.xpath('.//*[local-name()="metadata"]')
|
|
||||||
if md_els:
|
|
||||||
ts = md_els[0].find('.//*[@name="calibre:timestamp"]')
|
|
||||||
if ts is not None:
|
|
||||||
timestamp = ts.get('content')
|
|
||||||
old_ts = parse_date(timestamp)
|
|
||||||
metadata.timestamp = datetime.datetime(old_ts.year, old_ts.month, old_ts.day, old_ts.hour,
|
|
||||||
old_ts.minute, old_ts.second, old_ts.microsecond+1, old_ts.tzinfo)
|
|
||||||
if DEBUG:
|
|
||||||
self.log.info(" existing timestamp: %s" % metadata.timestamp)
|
|
||||||
else:
|
else:
|
||||||
metadata.timestamp = now()
|
metadata.timestamp = now()
|
||||||
if DEBUG:
|
if DEBUG:
|
||||||
|
self.log.warning(" missing <metadata> block in OPF file")
|
||||||
self.log.info(" add timestamp: %s" % metadata.timestamp)
|
self.log.info(" add timestamp: %s" % metadata.timestamp)
|
||||||
else:
|
|
||||||
metadata.timestamp = now()
|
|
||||||
if DEBUG:
|
|
||||||
self.log.warning(" missing <metadata> block in OPF file")
|
|
||||||
self.log.info(" add timestamp: %s" % metadata.timestamp)
|
|
||||||
# Force the language declaration for iBooks 1.1
|
|
||||||
#metadata.language = get_lang().replace('_', '-')
|
|
||||||
|
|
||||||
# Updates from metadata plugboard (ignoring publisher)
|
zf_opf.close()
|
||||||
metadata.language = metadata_x.language
|
|
||||||
|
|
||||||
if DEBUG:
|
|
||||||
if metadata.language != metadata_x.language:
|
|
||||||
self.log.info(" rewriting language: <dc:language>%s</dc:language>" % metadata.language)
|
|
||||||
|
|
||||||
zf_opf.close()
|
|
||||||
|
|
||||||
# If 'News' in tags, tweak the title/author for friendlier display in iBooks
|
# If 'News' in tags, tweak the title/author for friendlier display in iBooks
|
||||||
if _('News') in metadata.tags or \
|
if _('News') in metadata_x.tags or \
|
||||||
_('Catalog') in metadata.tags:
|
_('Catalog') in metadata_x.tags:
|
||||||
if metadata.title.find('[') > 0:
|
if metadata_x.title.find('[') > 0:
|
||||||
metadata.title = metadata.title[:metadata.title.find('[')-1]
|
metadata_x.title = metadata_x.title[:metadata_x.title.find('[')-1]
|
||||||
date_as_author = '%s, %s %s, %s' % (strftime('%A'), strftime('%B'), strftime('%d').lstrip('0'), strftime('%Y'))
|
date_as_author = '%s, %s %s, %s' % (strftime('%A'), strftime('%B'), strftime('%d').lstrip('0'), strftime('%Y'))
|
||||||
metadata.author = metadata.authors = [date_as_author]
|
metadata_x.author = metadata_x.authors = [date_as_author]
|
||||||
sort_author = re.sub('^\s*A\s+|^\s*The\s+|^\s*An\s+', '', metadata.title).rstrip()
|
sort_author = re.sub('^\s*A\s+|^\s*The\s+|^\s*An\s+', '', metadata_x.title).rstrip()
|
||||||
metadata.author_sort = '%s %s' % (sort_author, strftime('%Y-%m-%d'))
|
metadata_x.author_sort = '%s %s' % (sort_author, strftime('%Y-%m-%d'))
|
||||||
|
|
||||||
# Remove any non-alpha category tags
|
# Remove any non-alpha category tags
|
||||||
for tag in metadata.tags:
|
for tag in metadata_x.tags:
|
||||||
if not self._is_alpha(tag[0]):
|
if not self._is_alpha(tag[0]):
|
||||||
metadata.tags.remove(tag)
|
metadata_x.tags.remove(tag)
|
||||||
|
|
||||||
# If windows & series, nuke tags so series used as Category during _update_iTunes_metadata()
|
# If windows & series, nuke tags so series used as Category during _update_iTunes_metadata()
|
||||||
if iswindows and metadata.series:
|
if iswindows and metadata_x.series:
|
||||||
metadata.tags = None
|
metadata_x.tags = None
|
||||||
|
|
||||||
set_metadata(zfo, metadata, update_timestamp=True)
|
set_metadata(zfo, metadata_x, apply_null=True, update_timestamp=True)
|
||||||
|
|
||||||
def _update_device(self, msg='', wait=True):
|
def _update_device(self, msg='', wait=True):
|
||||||
'''
|
'''
|
||||||
@ -2771,6 +2773,8 @@ class ITUNES(DriverBase):
|
|||||||
lb_added.sort_name.set(metadata_x.title_sort)
|
lb_added.sort_name.set(metadata_x.title_sort)
|
||||||
|
|
||||||
if db_added:
|
if db_added:
|
||||||
|
self.log.warning(" waiting for db_added to become writeable ")
|
||||||
|
time.sleep(1.0)
|
||||||
db_added.name.set(metadata_x.title)
|
db_added.name.set(metadata_x.title)
|
||||||
db_added.album.set(metadata_x.title)
|
db_added.album.set(metadata_x.title)
|
||||||
db_added.artist.set(authors_to_string(metadata_x.authors))
|
db_added.artist.set(authors_to_string(metadata_x.authors))
|
||||||
@ -2826,6 +2830,8 @@ class ITUNES(DriverBase):
|
|||||||
break
|
break
|
||||||
|
|
||||||
if db_added:
|
if db_added:
|
||||||
|
self.log.warning(" waiting for db_added to become writeable ")
|
||||||
|
time.sleep(1.0)
|
||||||
# If no title_sort plugboard tweak, create sort_name from series/index
|
# If no title_sort plugboard tweak, create sort_name from series/index
|
||||||
if metadata.title_sort == metadata_x.title_sort:
|
if metadata.title_sort == metadata_x.title_sort:
|
||||||
db_added.sort_name.set("%s %s" % (self.title_sorter(metadata_x.series), series_index))
|
db_added.sort_name.set("%s %s" % (self.title_sorter(metadata_x.series), series_index))
|
||||||
@ -2866,6 +2872,8 @@ class ITUNES(DriverBase):
|
|||||||
lb_added.SortName = metadata_x.title_sort
|
lb_added.SortName = metadata_x.title_sort
|
||||||
|
|
||||||
if db_added:
|
if db_added:
|
||||||
|
self.log.warning(" waiting for db_added to become writeable ")
|
||||||
|
time.sleep(1.0)
|
||||||
db_added.Name = metadata_x.title
|
db_added.Name = metadata_x.title
|
||||||
db_added.Album = metadata_x.title
|
db_added.Album = metadata_x.title
|
||||||
db_added.Artist = authors_to_string(metadata_x.authors)
|
db_added.Artist = authors_to_string(metadata_x.authors)
|
||||||
|
@ -164,4 +164,21 @@ class EB511(USBMS):
|
|||||||
|
|
||||||
OSX_MAIN_MEM_VOL_PAT = re.compile(r'/eReader')
|
OSX_MAIN_MEM_VOL_PAT = re.compile(r'/eReader')
|
||||||
|
|
||||||
|
class ODYSSEY(N516):
|
||||||
|
name = 'Cybook Odyssey driver'
|
||||||
|
gui_name = 'Odyssey'
|
||||||
|
description = _('Communicate with the Cybook Odyssey eBook reader.')
|
||||||
|
|
||||||
|
BCD = [0x316]
|
||||||
|
VENDOR_NAME = 'LINUX'
|
||||||
|
WINDOWS_MAIN_MEM = WINDOWS_CARD_A_MEM = 'FILE-STOR_GADGET'
|
||||||
|
|
||||||
|
FORMATS = ['epub', 'fb2', 'html', 'pdf', 'txt']
|
||||||
|
|
||||||
|
EBOOK_DIR_MAIN = 'calibre'
|
||||||
|
|
||||||
|
def get_main_ebook_dir(self, for_upload=False):
|
||||||
|
if for_upload:
|
||||||
|
return self.EBOOK_DIR_MAIN
|
||||||
|
return ''
|
||||||
|
|
||||||
|
@ -95,18 +95,33 @@ def author_to_author_sort(author, method=None):
|
|||||||
def authors_to_sort_string(authors):
|
def authors_to_sort_string(authors):
|
||||||
return ' & '.join(map(author_to_author_sort, authors))
|
return ' & '.join(map(author_to_author_sort, authors))
|
||||||
|
|
||||||
try:
|
_title_pats = {}
|
||||||
_title_pat = re.compile(tweaks.get('title_sort_articles',
|
def get_title_sort_pat(lang=None):
|
||||||
r'^(A|The|An)\s+'), re.IGNORECASE)
|
ans = _title_pats.get(lang, None)
|
||||||
except:
|
if ans is not None:
|
||||||
print 'Error in title sort pattern'
|
return ans
|
||||||
import traceback
|
q = lang
|
||||||
traceback.print_exc()
|
from calibre.utils.localization import canonicalize_lang, get_lang
|
||||||
_title_pat = re.compile('^(A|The|An)\s+', re.IGNORECASE)
|
if lang is None:
|
||||||
|
q = tweaks['default_language_for_title_sort']
|
||||||
|
if q is None:
|
||||||
|
q = get_lang()
|
||||||
|
q = canonicalize_lang(q) if q else q
|
||||||
|
data = tweaks['per_language_title_sort_articles']
|
||||||
|
ans = data.get(q, None)
|
||||||
|
if ans is None:
|
||||||
|
ans = data['eng']
|
||||||
|
ans = frozenset(ans + data['eng'])
|
||||||
|
ans = '|'.join(ans)
|
||||||
|
ans = '^(%s)'%ans
|
||||||
|
ans = re.compile(ans, re.IGNORECASE)
|
||||||
|
_title_pats[lang] = ans
|
||||||
|
return ans
|
||||||
|
|
||||||
_ignore_starts = u'\'"'+u''.join(unichr(x) for x in range(0x2018, 0x201e)+[0x2032, 0x2033])
|
_ignore_starts = u'\'"'+u''.join(unichr(x) for x in
|
||||||
|
range(0x2018, 0x201e)+[0x2032, 0x2033])
|
||||||
|
|
||||||
def title_sort(title, order=None):
|
def title_sort(title, order=None, lang=None):
|
||||||
if order is None:
|
if order is None:
|
||||||
order = tweaks['title_series_sorting']
|
order = tweaks['title_series_sorting']
|
||||||
title = title.strip()
|
title = title.strip()
|
||||||
@ -114,7 +129,7 @@ def title_sort(title, order=None):
|
|||||||
return title
|
return title
|
||||||
if title and title[0] in _ignore_starts:
|
if title and title[0] in _ignore_starts:
|
||||||
title = title[1:]
|
title = title[1:]
|
||||||
match = _title_pat.search(title)
|
match = get_title_sort_pat(lang).search(title)
|
||||||
if match:
|
if match:
|
||||||
try:
|
try:
|
||||||
prep = match.group(1)
|
prep = match.group(1)
|
||||||
|
@ -15,7 +15,7 @@ from calibre.customize import Plugin
|
|||||||
from calibre.utils.logging import ThreadSafeLog, FileStream
|
from calibre.utils.logging import ThreadSafeLog, FileStream
|
||||||
from calibre.utils.config import JSONConfig
|
from calibre.utils.config import JSONConfig
|
||||||
from calibre.utils.titlecase import titlecase
|
from calibre.utils.titlecase import titlecase
|
||||||
from calibre.utils.icu import capitalize, lower
|
from calibre.utils.icu import capitalize, lower, upper
|
||||||
from calibre.ebooks.metadata import check_isbn
|
from calibre.ebooks.metadata import check_isbn
|
||||||
|
|
||||||
msprefs = JSONConfig('metadata_sources/global.json')
|
msprefs = JSONConfig('metadata_sources/global.json')
|
||||||
@ -121,7 +121,12 @@ def cap_author_token(token):
|
|||||||
# Normalize tokens of the form J.K. to J. K.
|
# Normalize tokens of the form J.K. to J. K.
|
||||||
parts = token.split('.')
|
parts = token.split('.')
|
||||||
return '. '.join(map(capitalize, parts)).strip()
|
return '. '.join(map(capitalize, parts)).strip()
|
||||||
return capitalize(token)
|
ans = capitalize(token)
|
||||||
|
for x in ('-', "'"):
|
||||||
|
idx = ans.find(x)
|
||||||
|
if idx > -1 and len(ans) > idx+2:
|
||||||
|
ans = ans[:idx+1] + upper(ans[idx+1]) + ans[idx+2:]
|
||||||
|
return ans
|
||||||
|
|
||||||
def fixauthors(authors):
|
def fixauthors(authors):
|
||||||
if not authors:
|
if not authors:
|
||||||
|
@ -11,7 +11,7 @@ import datetime
|
|||||||
from urllib import quote_plus
|
from urllib import quote_plus
|
||||||
from Queue import Queue, Empty
|
from Queue import Queue, Empty
|
||||||
from lxml import etree, html
|
from lxml import etree, html
|
||||||
from calibre import prints, as_unicode
|
from calibre import as_unicode
|
||||||
|
|
||||||
from calibre.ebooks.chardet import xml_to_unicode
|
from calibre.ebooks.chardet import xml_to_unicode
|
||||||
|
|
||||||
@ -54,7 +54,7 @@ class Ozon(Source):
|
|||||||
def create_query(self, log, title=None, authors=None, identifiers={}): # {{{
|
def create_query(self, log, title=None, authors=None, identifiers={}): # {{{
|
||||||
# div_book -> search only books, ebooks and audio books
|
# div_book -> search only books, ebooks and audio books
|
||||||
search_url = self.ozon_url + '/webservice/webservice.asmx/SearchWebService?searchContext=div_book&searchText='
|
search_url = self.ozon_url + '/webservice/webservice.asmx/SearchWebService?searchContext=div_book&searchText='
|
||||||
|
|
||||||
# for ozon.ru search we have to format ISBN with '-'
|
# for ozon.ru search we have to format ISBN with '-'
|
||||||
isbn = _format_isbn(log, identifiers.get('isbn', None))
|
isbn = _format_isbn(log, identifiers.get('isbn', None))
|
||||||
# TODO: format isbn!
|
# TODO: format isbn!
|
||||||
@ -79,7 +79,7 @@ class Ozon(Source):
|
|||||||
return search_url
|
return search_url
|
||||||
# }}}
|
# }}}
|
||||||
|
|
||||||
def identify(self, log, result_queue, abort, title=None, authors=None,
|
def identify(self, log, result_queue, abort, title=None, authors=None,
|
||||||
identifiers={}, timeout=30): # {{{
|
identifiers={}, timeout=30): # {{{
|
||||||
if not self.is_configured():
|
if not self.is_configured():
|
||||||
return
|
return
|
||||||
@ -112,13 +112,13 @@ class Ozon(Source):
|
|||||||
def get_metadata(self, log, entries, title, authors, identifiers): # {{{
|
def get_metadata(self, log, entries, title, authors, identifiers): # {{{
|
||||||
# some book titles have extra characters like this
|
# some book titles have extra characters like this
|
||||||
# TODO: make a twick
|
# TODO: make a twick
|
||||||
reRemoveFromTitle = None
|
reRemoveFromTitle = None
|
||||||
#reRemoveFromTitle = re.compile(r'[?!:.,;+-/&%"\'=]')
|
#reRemoveFromTitle = re.compile(r'[?!:.,;+-/&%"\'=]')
|
||||||
|
|
||||||
title = unicode(title).upper() if title else ''
|
title = unicode(title).upper() if title else ''
|
||||||
if reRemoveFromTitle:
|
if reRemoveFromTitle:
|
||||||
title = reRemoveFromTitle.sub('', title)
|
title = reRemoveFromTitle.sub('', title)
|
||||||
authors = map(_normalizeAuthorNameWithInitials,
|
authors = map(_normalizeAuthorNameWithInitials,
|
||||||
map(unicode.upper, map(unicode, authors))) if authors else None
|
map(unicode.upper, map(unicode, authors))) if authors else None
|
||||||
ozon_id = identifiers.get('ozon', None)
|
ozon_id = identifiers.get('ozon', None)
|
||||||
|
|
||||||
@ -320,7 +320,7 @@ class Ozon(Source):
|
|||||||
displ_lang = lng_splt[0].strip()
|
displ_lang = lng_splt[0].strip()
|
||||||
metadata.language = _translageLanguageToCode(displ_lang)
|
metadata.language = _translageLanguageToCode(displ_lang)
|
||||||
#log.debug(u'language: %s'%displ_lang)
|
#log.debug(u'language: %s'%displ_lang)
|
||||||
|
|
||||||
# can be set before from xml search responce
|
# can be set before from xml search responce
|
||||||
if not metadata.pubdate:
|
if not metadata.pubdate:
|
||||||
xpt = u'normalize-space(//div[@class="product-misc"]//text()[contains(., "г.")])'
|
xpt = u'normalize-space(//div[@class="product-misc"]//text()[contains(., "г.")])'
|
||||||
@ -434,13 +434,13 @@ def _translageLanguageToCode(displayLang): # {{{
|
|||||||
# [В.П. Колесников | Колесников В.П.]-> В. П. BКолесников
|
# [В.П. Колесников | Колесников В.П.]-> В. П. BКолесников
|
||||||
def _normalizeAuthorNameWithInitials(name): # {{{
|
def _normalizeAuthorNameWithInitials(name): # {{{
|
||||||
res = name
|
res = name
|
||||||
if name:
|
if name:
|
||||||
re1 = u'^(?P<lname>\S+)\s+(?P<fname>[^\d\W]\.)(?:\s*(?P<mname>[^\d\W]\.))?$'
|
re1 = u'^(?P<lname>\S+)\s+(?P<fname>[^\d\W]\.)(?:\s*(?P<mname>[^\d\W]\.))?$'
|
||||||
re2 = u'^(?P<fname>[^\d\W]\.)(?:\s*(?P<mname>[^\d\W]\.))?\s+(?P<lname>\S+)$'
|
re2 = u'^(?P<fname>[^\d\W]\.)(?:\s*(?P<mname>[^\d\W]\.))?\s+(?P<lname>\S+)$'
|
||||||
matcher = re.match(re1, unicode(name), re.UNICODE)
|
matcher = re.match(re1, unicode(name), re.UNICODE)
|
||||||
if not matcher:
|
if not matcher:
|
||||||
matcher = re.match(re2, unicode(name), re.UNICODE)
|
matcher = re.match(re2, unicode(name), re.UNICODE)
|
||||||
|
|
||||||
if matcher:
|
if matcher:
|
||||||
d = matcher.groupdict()
|
d = matcher.groupdict()
|
||||||
res = ' '.join(x for x in (d['fname'], d['mname'], d['lname']) if x)
|
res = ' '.join(x for x in (d['fname'], d['mname'], d['lname']) if x)
|
||||||
|
@ -302,7 +302,18 @@ class MobiWriter(object):
|
|||||||
|
|
||||||
def generate_record0(self): # MOBI header {{{
|
def generate_record0(self): # MOBI header {{{
|
||||||
metadata = self.oeb.metadata
|
metadata = self.oeb.metadata
|
||||||
exth = self.build_exth()
|
bt = 0x002
|
||||||
|
if self.primary_index_record_idx is not None:
|
||||||
|
if False and self.indexer.is_flat_periodical:
|
||||||
|
# Disabled as setting this to 0x102 causes the Kindle to not
|
||||||
|
# auto archive the issues
|
||||||
|
bt = 0x102
|
||||||
|
elif self.indexer.is_periodical:
|
||||||
|
# If you change this, remember to change the cdetype in the EXTH
|
||||||
|
# header as well
|
||||||
|
bt = 0x103 if self.indexer.is_flat_periodical else 0x101
|
||||||
|
|
||||||
|
exth = self.build_exth(bt)
|
||||||
first_image_record = None
|
first_image_record = None
|
||||||
if self.image_records:
|
if self.image_records:
|
||||||
first_image_record = len(self.records)
|
first_image_record = len(self.records)
|
||||||
@ -351,17 +362,6 @@ class MobiWriter(object):
|
|||||||
# 0x10 - 0x13 : UID
|
# 0x10 - 0x13 : UID
|
||||||
# 0x14 - 0x17 : Generator version
|
# 0x14 - 0x17 : Generator version
|
||||||
|
|
||||||
bt = 0x002
|
|
||||||
if self.primary_index_record_idx is not None:
|
|
||||||
if False and self.indexer.is_flat_periodical:
|
|
||||||
# Disabled as setting this to 0x102 causes the Kindle to not
|
|
||||||
# auto archive the issues
|
|
||||||
bt = 0x102
|
|
||||||
elif self.indexer.is_periodical:
|
|
||||||
# If you change this, remember to change the cdetype in the EXTH
|
|
||||||
# header as well
|
|
||||||
bt = {'newspaper':0x101}.get(self.publication_type, 0x103)
|
|
||||||
|
|
||||||
record0.write(pack(b'>IIIII',
|
record0.write(pack(b'>IIIII',
|
||||||
0xe8, bt, 65001, uid, 6))
|
0xe8, bt, 65001, uid, 6))
|
||||||
|
|
||||||
@ -479,7 +479,7 @@ class MobiWriter(object):
|
|||||||
self.records[0] = align_block(record0)
|
self.records[0] = align_block(record0)
|
||||||
# }}}
|
# }}}
|
||||||
|
|
||||||
def build_exth(self): # EXTH Header {{{
|
def build_exth(self, mobi_doctype): # EXTH Header {{{
|
||||||
oeb = self.oeb
|
oeb = self.oeb
|
||||||
exth = StringIO()
|
exth = StringIO()
|
||||||
nrecs = 0
|
nrecs = 0
|
||||||
@ -535,16 +535,17 @@ class MobiWriter(object):
|
|||||||
nrecs += 1
|
nrecs += 1
|
||||||
|
|
||||||
# Write cdetype
|
# Write cdetype
|
||||||
if not self.is_periodical and not self.opts.share_not_sync:
|
if not self.is_periodical:
|
||||||
exth.write(pack(b'>II', 501, 12))
|
if not self.opts.share_not_sync:
|
||||||
exth.write(b'EBOK')
|
exth.write(pack(b'>II', 501, 12))
|
||||||
nrecs += 1
|
exth.write(b'EBOK')
|
||||||
|
nrecs += 1
|
||||||
else:
|
else:
|
||||||
# Should be b'NWPR' for doc type of 0x101 and b'MAGZ' for doctype
|
ids = {0x101:b'NWPR', 0x103:b'MAGZ'}.get(mobi_doctype, None)
|
||||||
# of 0x103 but the old writer didn't write them, and I dont know
|
if ids:
|
||||||
# what it should be for type 0x102 (b'BLOG'?) so write nothing
|
exth.write(pack(b'>II', 501, 12))
|
||||||
# instead
|
exth.write(ids)
|
||||||
pass
|
nrecs += 1
|
||||||
|
|
||||||
# Add a publication date entry
|
# Add a publication date entry
|
||||||
if oeb.metadata['date']:
|
if oeb.metadata['date']:
|
||||||
|
@ -965,16 +965,22 @@ class Manifest(object):
|
|||||||
try:
|
try:
|
||||||
data = etree.fromstring(data, parser=parser)
|
data = etree.fromstring(data, parser=parser)
|
||||||
except etree.XMLSyntaxError:
|
except etree.XMLSyntaxError:
|
||||||
self.oeb.logger.warn('Stripping comments and meta tags from %s'%
|
self.oeb.logger.warn('Stripping comments from %s'%
|
||||||
self.href)
|
self.href)
|
||||||
data = re.compile(r'<!--.*?-->', re.DOTALL).sub('',
|
data = re.compile(r'<!--.*?-->', re.DOTALL).sub('',
|
||||||
data)
|
data)
|
||||||
data = re.sub(r'<meta\s+[^>]+?>', '', data)
|
|
||||||
data = data.replace(
|
data = data.replace(
|
||||||
"<?xml version='1.0' encoding='utf-8'?><o:p></o:p>",
|
"<?xml version='1.0' encoding='utf-8'?><o:p></o:p>",
|
||||||
'')
|
'')
|
||||||
data = data.replace("<?xml version='1.0' encoding='utf-8'??>", '')
|
data = data.replace("<?xml version='1.0' encoding='utf-8'??>", '')
|
||||||
data = etree.fromstring(data, parser=RECOVER_PARSER)
|
try:
|
||||||
|
data = etree.fromstring(data,
|
||||||
|
parser=RECOVER_PARSER)
|
||||||
|
except etree.XMLSyntaxError:
|
||||||
|
self.oeb.logger.warn('Stripping meta tags from %s'%
|
||||||
|
self.href)
|
||||||
|
data = re.sub(r'<meta\s+[^>]+?>', '', data)
|
||||||
|
data = etree.fromstring(data, parser=RECOVER_PARSER)
|
||||||
elif namespace(data.tag) != XHTML_NS:
|
elif namespace(data.tag) != XHTML_NS:
|
||||||
# OEB_DOC_NS, but possibly others
|
# OEB_DOC_NS, but possibly others
|
||||||
ns = namespace(data.tag)
|
ns = namespace(data.tag)
|
||||||
|
@ -11,9 +11,8 @@ import operator
|
|||||||
import math
|
import math
|
||||||
from collections import defaultdict
|
from collections import defaultdict
|
||||||
from lxml import etree
|
from lxml import etree
|
||||||
from calibre.ebooks.oeb.base import XHTML, XHTML_NS
|
from calibre.ebooks.oeb.base import (XHTML, XHTML_NS, CSS_MIME, OEB_STYLES,
|
||||||
from calibre.ebooks.oeb.base import CSS_MIME, OEB_STYLES
|
namespace, barename, XPath)
|
||||||
from calibre.ebooks.oeb.base import namespace, barename
|
|
||||||
from calibre.ebooks.oeb.stylizer import Stylizer
|
from calibre.ebooks.oeb.stylizer import Stylizer
|
||||||
|
|
||||||
COLLAPSE = re.compile(r'[ \t\r\n\v]+')
|
COLLAPSE = re.compile(r'[ \t\r\n\v]+')
|
||||||
@ -232,7 +231,10 @@ class CSSFlattener(object):
|
|||||||
cssdict['text-align'] = val
|
cssdict['text-align'] = val
|
||||||
del node.attrib['align']
|
del node.attrib['align']
|
||||||
if node.tag == XHTML('font'):
|
if node.tag == XHTML('font'):
|
||||||
node.tag = XHTML('span')
|
tags = ['descendant::h:%s'%x for x in ('p', 'div', 'table', 'h1',
|
||||||
|
'h2', 'h3', 'h4', 'h5', 'h6', 'ol', 'ul', 'dl', 'blockquote')]
|
||||||
|
tag = 'div' if XPath('|'.join(tags))(node) else 'span'
|
||||||
|
node.tag = XHTML(tag)
|
||||||
if 'size' in node.attrib:
|
if 'size' in node.attrib:
|
||||||
def force_int(raw):
|
def force_int(raw):
|
||||||
return int(re.search(r'([0-9+-]+)', raw).group(1))
|
return int(re.search(r'([0-9+-]+)', raw).group(1))
|
||||||
|
@ -17,7 +17,7 @@ from calibre.ebooks.metadata import fmt_sidx, authors_to_string, string_to_autho
|
|||||||
from calibre.ebooks.metadata.book.base import SafeFormat
|
from calibre.ebooks.metadata.book.base import SafeFormat
|
||||||
from calibre.ptempfile import PersistentTemporaryFile
|
from calibre.ptempfile import PersistentTemporaryFile
|
||||||
from calibre.utils.config import tweaks, prefs
|
from calibre.utils.config import tweaks, prefs
|
||||||
from calibre.utils.date import dt_factory, qt_to_dt
|
from calibre.utils.date import dt_factory, qt_to_dt, as_local_time
|
||||||
from calibre.utils.icu import sort_key
|
from calibre.utils.icu import sort_key
|
||||||
from calibre.utils.search_query_parser import SearchQueryParser
|
from calibre.utils.search_query_parser import SearchQueryParser
|
||||||
from calibre.library.caches import (_match, CONTAINS_MATCH, EQUALS_MATCH,
|
from calibre.library.caches import (_match, CONTAINS_MATCH, EQUALS_MATCH,
|
||||||
@ -580,7 +580,7 @@ class BooksModel(QAbstractTableModel): # {{{
|
|||||||
def datetime_type(r, idx=-1):
|
def datetime_type(r, idx=-1):
|
||||||
val = self.db.data[r][idx]
|
val = self.db.data[r][idx]
|
||||||
if val is not None:
|
if val is not None:
|
||||||
return QVariant(QDateTime(val))
|
return QVariant(QDateTime(as_local_time(val)))
|
||||||
else:
|
else:
|
||||||
return QVariant(UNDEFINED_QDATETIME)
|
return QVariant(UNDEFINED_QDATETIME)
|
||||||
|
|
||||||
|
@ -138,9 +138,10 @@ class TitleSortEdit(TitleEdit):
|
|||||||
' For example, The Exorcist might be sorted as Exorcist, The.')
|
' For example, The Exorcist might be sorted as Exorcist, The.')
|
||||||
LABEL = _('Title &sort:')
|
LABEL = _('Title &sort:')
|
||||||
|
|
||||||
def __init__(self, parent, title_edit, autogen_button):
|
def __init__(self, parent, title_edit, autogen_button, languages_edit):
|
||||||
TitleEdit.__init__(self, parent)
|
TitleEdit.__init__(self, parent)
|
||||||
self.title_edit = title_edit
|
self.title_edit = title_edit
|
||||||
|
self.languages_edit = languages_edit
|
||||||
|
|
||||||
base = self.TOOLTIP
|
base = self.TOOLTIP
|
||||||
ok_tooltip = '<p>' + textwrap.fill(base+'<br><br>'+
|
ok_tooltip = '<p>' + textwrap.fill(base+'<br><br>'+
|
||||||
@ -157,10 +158,20 @@ class TitleSortEdit(TitleEdit):
|
|||||||
|
|
||||||
self.autogen_button = autogen_button
|
self.autogen_button = autogen_button
|
||||||
autogen_button.clicked.connect(self.auto_generate)
|
autogen_button.clicked.connect(self.auto_generate)
|
||||||
|
languages_edit.editTextChanged.connect(self.update_state)
|
||||||
|
languages_edit.currentIndexChanged.connect(self.update_state)
|
||||||
self.update_state()
|
self.update_state()
|
||||||
|
|
||||||
|
@property
|
||||||
|
def book_lang(self):
|
||||||
|
try:
|
||||||
|
book_lang = self.languages_edit.lang_codes[0]
|
||||||
|
except:
|
||||||
|
book_lang = None
|
||||||
|
return book_lang
|
||||||
|
|
||||||
def update_state(self, *args):
|
def update_state(self, *args):
|
||||||
ts = title_sort(self.title_edit.current_val)
|
ts = title_sort(self.title_edit.current_val, lang=self.book_lang)
|
||||||
normal = ts == self.current_val
|
normal = ts == self.current_val
|
||||||
if normal:
|
if normal:
|
||||||
col = 'rgb(0, 255, 0, 20%)'
|
col = 'rgb(0, 255, 0, 20%)'
|
||||||
@ -173,7 +184,8 @@ class TitleSortEdit(TitleEdit):
|
|||||||
self.setWhatsThis(tt)
|
self.setWhatsThis(tt)
|
||||||
|
|
||||||
def auto_generate(self, *args):
|
def auto_generate(self, *args):
|
||||||
self.current_val = title_sort(self.title_edit.current_val)
|
self.current_val = title_sort(self.title_edit.current_val,
|
||||||
|
lang=self.book_lang)
|
||||||
|
|
||||||
def break_cycles(self):
|
def break_cycles(self):
|
||||||
try:
|
try:
|
||||||
|
@ -109,6 +109,9 @@ class MetadataSingleDialogBase(ResizableDialog):
|
|||||||
def create_basic_metadata_widgets(self): # {{{
|
def create_basic_metadata_widgets(self): # {{{
|
||||||
self.basic_metadata_widgets = []
|
self.basic_metadata_widgets = []
|
||||||
|
|
||||||
|
self.languages = LanguagesEdit(self)
|
||||||
|
self.basic_metadata_widgets.append(self.languages)
|
||||||
|
|
||||||
self.title = TitleEdit(self)
|
self.title = TitleEdit(self)
|
||||||
self.title.textChanged.connect(self.update_window_title)
|
self.title.textChanged.connect(self.update_window_title)
|
||||||
self.deduce_title_sort_button = QToolButton(self)
|
self.deduce_title_sort_button = QToolButton(self)
|
||||||
@ -119,7 +122,7 @@ class MetadataSingleDialogBase(ResizableDialog):
|
|||||||
self.deduce_title_sort_button.setWhatsThis(
|
self.deduce_title_sort_button.setWhatsThis(
|
||||||
self.deduce_title_sort_button.toolTip())
|
self.deduce_title_sort_button.toolTip())
|
||||||
self.title_sort = TitleSortEdit(self, self.title,
|
self.title_sort = TitleSortEdit(self, self.title,
|
||||||
self.deduce_title_sort_button)
|
self.deduce_title_sort_button, self.languages)
|
||||||
self.basic_metadata_widgets.extend([self.title, self.title_sort])
|
self.basic_metadata_widgets.extend([self.title, self.title_sort])
|
||||||
|
|
||||||
self.deduce_author_sort_button = b = QToolButton(self)
|
self.deduce_author_sort_button = b = QToolButton(self)
|
||||||
@ -203,9 +206,6 @@ class MetadataSingleDialogBase(ResizableDialog):
|
|||||||
self.publisher = PublisherEdit(self)
|
self.publisher = PublisherEdit(self)
|
||||||
self.basic_metadata_widgets.append(self.publisher)
|
self.basic_metadata_widgets.append(self.publisher)
|
||||||
|
|
||||||
self.languages = LanguagesEdit(self)
|
|
||||||
self.basic_metadata_widgets.append(self.languages)
|
|
||||||
|
|
||||||
self.timestamp = DateEdit(self)
|
self.timestamp = DateEdit(self)
|
||||||
self.pubdate = PubdateEdit(self)
|
self.pubdate = PubdateEdit(self)
|
||||||
self.basic_metadata_widgets.extend([self.timestamp, self.pubdate])
|
self.basic_metadata_widgets.extend([self.timestamp, self.pubdate])
|
||||||
@ -282,7 +282,6 @@ class MetadataSingleDialogBase(ResizableDialog):
|
|||||||
# Commented out as it doesn't play nice with Next, Prev buttons
|
# Commented out as it doesn't play nice with Next, Prev buttons
|
||||||
#self.fetch_metadata_button.setFocus(Qt.OtherFocusReason)
|
#self.fetch_metadata_button.setFocus(Qt.OtherFocusReason)
|
||||||
|
|
||||||
|
|
||||||
# Miscellaneous interaction methods {{{
|
# Miscellaneous interaction methods {{{
|
||||||
def update_window_title(self, *args):
|
def update_window_title(self, *args):
|
||||||
title = self.title.current_val
|
title = self.title.current_val
|
||||||
|
@ -209,6 +209,12 @@ class CybookOrizon(CybookOpus):
|
|||||||
name = 'Cybook Orizon'
|
name = 'Cybook Orizon'
|
||||||
id = 'cybook_orizon'
|
id = 'cybook_orizon'
|
||||||
|
|
||||||
|
class CybookOdyssey(CybookOpus):
|
||||||
|
|
||||||
|
name = 'Cybook Odyssey'
|
||||||
|
id = 'cybook_odyssey'
|
||||||
|
|
||||||
|
|
||||||
class PocketBook360(CybookOpus):
|
class PocketBook360(CybookOpus):
|
||||||
|
|
||||||
manufacturer = 'PocketBook'
|
manufacturer = 'PocketBook'
|
||||||
|
@ -332,8 +332,8 @@ def do_remove(db, ids):
|
|||||||
for y in x:
|
for y in x:
|
||||||
db.delete_book(y)
|
db.delete_book(y)
|
||||||
|
|
||||||
send_message()
|
|
||||||
db.clean()
|
db.clean()
|
||||||
|
send_message()
|
||||||
|
|
||||||
def remove_option_parser():
|
def remove_option_parser():
|
||||||
return get_parser(_(
|
return get_parser(_(
|
||||||
@ -358,7 +358,7 @@ def command_remove(args, dbpath):
|
|||||||
for x in args[1].split(','):
|
for x in args[1].split(','):
|
||||||
y = x.split('-')
|
y = x.split('-')
|
||||||
if len(y) > 1:
|
if len(y) > 1:
|
||||||
ids.append(range(int(y[0], int(y[1]))))
|
ids.extend(range(int(y[0]), int(y[1])))
|
||||||
else:
|
else:
|
||||||
ids.append(int(y[0]))
|
ids.append(int(y[0]))
|
||||||
|
|
||||||
|
@ -15,7 +15,7 @@ from math import ceil
|
|||||||
|
|
||||||
from calibre import prints
|
from calibre import prints
|
||||||
from calibre.ebooks.metadata import (title_sort, author_to_author_sort,
|
from calibre.ebooks.metadata import (title_sort, author_to_author_sort,
|
||||||
string_to_authors, authors_to_string)
|
string_to_authors, authors_to_string, get_title_sort_pat)
|
||||||
from calibre.ebooks.metadata.opf2 import metadata_to_opf
|
from calibre.ebooks.metadata.opf2 import metadata_to_opf
|
||||||
from calibre.library.database import LibraryDatabase
|
from calibre.library.database import LibraryDatabase
|
||||||
from calibre.library.field_metadata import FieldMetadata, TagsIcons
|
from calibre.library.field_metadata import FieldMetadata, TagsIcons
|
||||||
@ -1004,10 +1004,11 @@ class LibraryDatabase2(LibraryDatabase, SchemaUpgrade, CustomColumns):
|
|||||||
return False
|
return False
|
||||||
|
|
||||||
def find_identical_books(self, mi):
|
def find_identical_books(self, mi):
|
||||||
fuzzy_title_patterns = [(re.compile(pat, re.IGNORECASE), repl) for pat, repl in
|
fuzzy_title_patterns = [(re.compile(pat, re.IGNORECASE) if
|
||||||
|
isinstance(pat, basestring) else pat, repl) for pat, repl in
|
||||||
[
|
[
|
||||||
(r'[\[\](){}<>\'";,:#]', ''),
|
(r'[\[\](){}<>\'";,:#]', ''),
|
||||||
(tweaks.get('title_sort_articles', r'^(a|the|an)\s+'), ''),
|
(get_title_sort_pat(), ''),
|
||||||
(r'[-._]', ' '),
|
(r'[-._]', ' '),
|
||||||
(r'\s+', ' ')
|
(r'\s+', ' ')
|
||||||
]
|
]
|
||||||
|
@ -117,8 +117,8 @@ How can I help get my device supported in |app|?
|
|||||||
If your device appears as a USB disk to the operating system, adding support for it to |app| is very easy.
|
If your device appears as a USB disk to the operating system, adding support for it to |app| is very easy.
|
||||||
We just need some information from you:
|
We just need some information from you:
|
||||||
|
|
||||||
* What ebook formats does your device support?
|
* Complete list of ebook formats that your device supports.
|
||||||
* Is there a special directory on the device in which all ebook files should be placed?
|
* Is there a special directory on the device in which all ebook files should be placed? Also does the device detect files placed in sub directories?
|
||||||
* We also need information about your device that |app| will collect automatically. First, if your
|
* We also need information about your device that |app| will collect automatically. First, if your
|
||||||
device supports SD cards, insert them. Then connect your device to the computer. In calibre go to :guilabel:`Preferences->Advanced->Miscellaneous`
|
device supports SD cards, insert them. Then connect your device to the computer. In calibre go to :guilabel:`Preferences->Advanced->Miscellaneous`
|
||||||
and click the "Debug device detection" button. This will create some debug output. Copy it to a file
|
and click the "Debug device detection" button. This will create some debug output. Copy it to a file
|
||||||
|
File diff suppressed because it is too large
Load Diff
@ -339,6 +339,7 @@ class TemplateFormatter(string.Formatter):
|
|||||||
########## a formatter that throws exceptions ############
|
########## a formatter that throws exceptions ############
|
||||||
|
|
||||||
def unsafe_format(self, fmt, kwargs, book):
|
def unsafe_format(self, fmt, kwargs, book):
|
||||||
|
self.column_name = self.template_cache = None
|
||||||
self.kwargs = kwargs
|
self.kwargs = kwargs
|
||||||
self.book = book
|
self.book = book
|
||||||
self.composite_values = {}
|
self.composite_values = {}
|
||||||
|
Loading…
x
Reference in New Issue
Block a user