Merge from trunk

This commit is contained in:
Charles Haley 2012-06-19 13:14:31 +02:00
commit 916bb44179
14 changed files with 1084 additions and 65 deletions

595
recipes/bbc_brasil.recipe Normal file
View File

@ -0,0 +1,595 @@
##
## Title: BBC News, Sport, and Blog Calibre Recipe
## Contact: mattst - jmstanfield@gmail.com
##
## License: GNU General Public License v3 - http://www.gnu.org/copyleft/gpl.html
## Copyright: mattst - jmstanfield@gmail.com
##
## Written: November 2011
## Last Edited: 2011-11-19
##
__license__ = 'GNU General Public License v3 - http://www.gnu.org/copyleft/gpl.html'
__copyright__ = 'mattst - jmstanfield@gmail.com'
'''
BBC News, Sport, and Blog Calibre Recipe
'''
# Import the regular expressions module.
import re
# Import the BasicNewsRecipe class which this class extends.
from calibre.web.feeds.recipes import BasicNewsRecipe
class BBCBrasilRecipe(BasicNewsRecipe):
#
# **** IMPORTANT USERS READ ME ****
#
# First select the feeds you want then scroll down below the feeds list
# and select the values you want for the other user preferences, like
# oldest_article and such like.
#
#
# Select the BBC rss feeds which you want in your ebook.
# Selected feed have NO '#' at their start, de-selected feeds begin with a '#'.
#
# Eg. ("News Home", "http://feeds.bbci.co.uk/... - include feed.
# Eg. #("News Home", "http://feeds.bbci.co.uk/... - do not include feed.
#
# There are 68 feeds below which constitute the bulk of the available rss
# feeds on the BBC web site. These include 5 blogs by editors and
# correspondants, 16 sports feeds, 15 'sub' regional feeds (Eg. North West
# Wales, Scotland Business), and 7 Welsh language feeds.
#
# Some of the feeds are low volume (Eg. blogs), or very low volume (Eg. Click)
# so if "oldest_article = 1.5" (only articles published in the last 36 hours)
# you may get some 'empty feeds' which will not then be included in the ebook.
#
# The 15 feeds currently selected below are simply my default ones.
#
# Note: With all 68 feeds selected, oldest_article set to 2,
# max_articles_per_feed set to 100, and simultaneous_downloads set to 10,
# the ebook creation took 29 minutes on my speedy 100 mbps net connection,
# fairly high-end desktop PC running Linux (Ubuntu Lucid-Lynx).
# More realistically with 15 feeds selected, oldest_article set to 1.5,
# max_articles_per_feed set to 100, and simultaneous_downloads set to 20,
# it took 6 minutes. If that's too slow increase 'simultaneous_downloads'.
#
# Select / de-select the feeds you want in your ebook.
#
feeds = [
(u'Primeira P\xe1gina', u'http://www.bbc.co.uk/portuguese/index.xml'),
(u'\xdaltimas Not\xedcias', u'http://www.bbc.co.uk/portuguese/ultimas_noticias/index.xml'),
(u'Internacional', u'http://www.bbc.co.uk/portuguese/topicos/internacional/index.xml'),
(u'Brasil', u'http://www.bbc.co.uk/portuguese/topicos/brasil/index.xml'),
(u'Am\xe9rica Latina', u'http://www.bbc.co.uk/portuguese/topicos/america_latina/index.xml'),
(u'Economia', u'http://www.bbc.co.uk/portuguese/topicos/economia/index.xml'),
(u'Sa\xfade', u'http://www.bbc.co.uk/portuguese/topicos/saude/index.xml'),
(u'Ci\xeancia e Tecnologia', u'http://www.bbc.co.uk/portuguese/topicos/ciencia_e_tecnologia/index.xml'),
(u'Cultura', u'http://www.bbc.co.uk/portuguese/topicos/cultura/index.xml'),
(u'V\xeddeos e Fotos', u'http://www.bbc.co.uk/portuguese/videos_e_fotos/index.xml'),
(u'Especiais', u'http://www.bbc.co.uk/portuguese/especiais/index.xml')
]
# **** SELECT YOUR USER PREFERENCES ****
# Title to use for the ebook.
#
title = 'BBC Brasil'
# A brief description for the ebook.
#
description = u'Not\xedcias do Brasil e do mundo pela British Broadcasting Corporation'
# The max number of articles which may be downloaded from each feed.
# I've never seen more than about 70 articles in a single feed in the
# BBC feeds.
#
max_articles_per_feed = 100
# The max age of articles which may be downloaded from each feed. This is
# specified in days - note fractions of days are allowed, Eg. 2.5 (2 and a
# half days). My default of 1.5 days is the last 36 hours, the point at
# which I've decided 'news' becomes 'old news', but be warned this is not
# so good for the blogs, technology, magazine, etc., and sports feeds.
# You may wish to extend this to 2-5 but watch out ebook creation time will
# increase as well. Setting this to 30 will get everything (AFAICT) as long
# as max_articles_per_feed remains set high (except for 'Click' which is
# v. low volume and its currently oldest article is 4th Feb 2011).
#
oldest_article = 1.5
# Number of simultaneous downloads. 20 is consistantly working fine on the
# BBC News feeds with no problems. Speeds things up from the defualt of 5.
# If you have a lot of feeds and/or have increased oldest_article above 2
# then you may wish to try increasing simultaneous_downloads to 25-30,
# Or, of course, if you are in a hurry. [I've not tried beyond 20.]
#
simultaneous_downloads = 20
# Timeout for fetching files from the server in seconds. The default of
# 120 seconds, seems somewhat excessive.
#
timeout = 30
# The format string for the date shown on the ebook's first page.
# List of all values: http://docs.python.org/library/time.html
# Default in news.py has a leading space so that's mirrored here.
# As with 'feeds' select/de-select by adding/removing the initial '#',
# only one timefmt should be selected, here's a few to choose from.
#
timefmt = ' [%a, %d %b %Y]' # [Fri, 14 Nov 2011] (Calibre default)
#timefmt = ' [%a, %d %b %Y %H:%M]' # [Fri, 14 Nov 2011 18:30]
#timefmt = ' [%a, %d %b %Y %I:%M %p]' # [Fri, 14 Nov 2011 06:30 PM]
#timefmt = ' [%d %b %Y]' # [14 Nov 2011]
#timefmt = ' [%d %b %Y %H:%M]' # [14 Nov 2011 18.30]
#timefmt = ' [%Y-%m-%d]' # [2011-11-14]
#timefmt = ' [%Y-%m-%d-%H-%M]' # [2011-11-14-18-30]
#
# **** IMPORTANT ****
#
# DO NOT EDIT BELOW HERE UNLESS YOU KNOW WHAT YOU ARE DOING.
#
# DO NOT EDIT BELOW HERE UNLESS YOU KNOW WHAT YOU ARE DOING.
#
# I MEAN IT, YES I DO, ABSOLUTELY, AT YOU OWN RISK. :)
#
# **** IMPORTANT ****
#
# Author of this recipe.
__author__ = 'claviola'
# Specify English as the language of the RSS feeds (ISO-639 code).
language = 'en_GB'
# Set tags.
tags = 'news, sport, blog'
# Set publisher and publication type.
publisher = 'BBC'
publication_type = 'newspaper'
# Disable stylesheets from site.
no_stylesheets = True
# Specifies an override encoding for sites that have an incorrect charset
# specified. Default of 'None' says to auto-detect. Some other BBC recipes
# use 'utf8', which works fine (so use that if necessary) but auto-detecting
# with None is working fine, so stick with that for robustness.
encoding = None
# Sets whether a feed has full articles embedded in it. The BBC feeds do not.
use_embedded_content = False
# Removes empty feeds - why keep them!?
remove_empty_feeds = True
# Create a custom title which fits nicely in the Kindle title list.
# Requires "import time" above class declaration, and replacing
# title with custom_title in conversion_options (right column only).
# Example of string below: "BBC News - 14 Nov 2011"
#
# custom_title = "BBC News - " + time.strftime('%d %b %Y')
'''
# Conversion options for advanced users, but don't forget to comment out the
# current conversion_options below. Avoid setting 'linearize_tables' as that
# plays havoc with the 'old style' table based pages.
#
conversion_options = { 'title' : title,
'comments' : description,
'tags' : tags,
'language' : language,
'publisher' : publisher,
'authors' : publisher,
'smarten_punctuation' : True
}
'''
conversion_options = { 'smarten_punctuation' : True }
# Specify extra CSS - overrides ALL other CSS (IE. Added last).
extra_css = 'body { font-family: verdana, helvetica, sans-serif; } \
.introduction, .first { font-weight: bold; } \
.cross-head { font-weight: bold; font-size: 125%; } \
.cap, .caption { display: block; font-size: 80%; font-style: italic; } \
.cap, .caption, .caption img, .caption span { display: block; text-align: center; margin: 5px auto; } \
.byl, .byd, .byline img, .byline-name, .byline-title, .author-name, .author-position, \
.correspondent-portrait img, .byline-lead-in, .name, .role, .bbc-role { display: block; \
text-align: center; font-size: 80%; font-style: italic; margin: 1px auto; } \
.story-date, .published, .datestamp { font-size: 80%; } \
table { width: 100%; } \
td img { display: block; margin: 5px auto; } \
ul { padding-top: 10px; } \
ol { padding-top: 10px; } \
li { padding-top: 5px; padding-bottom: 5px; } \
h1 { text-align: center; font-size: 175%; font-weight: bold; } \
h2 { text-align: center; font-size: 150%; font-weight: bold; } \
h3 { text-align: center; font-size: 125%; font-weight: bold; } \
h4, h5, h6 { text-align: center; font-size: 100%; font-weight: bold; }'
# Remove various tag attributes to improve the look of the ebook pages.
remove_attributes = [ 'border', 'cellspacing', 'align', 'cellpadding', 'colspan',
'valign', 'vspace', 'hspace', 'alt', 'width', 'height' ]
# Remove the (admittedly rarely used) line breaks, "<br />", which sometimes
# cause a section of the ebook to start in an unsightly fashion or, more
# frequently, a "<br />" will muck up the formatting of a correspondant's byline.
# "<br />" and "<br clear/>" are far more frequently used on the table formatted
# style of pages, and really spoil the look of the ebook pages.
preprocess_regexps = [(re.compile(r'<br[ ]*/>', re.IGNORECASE), lambda m: ''),
(re.compile(r'<br[ ]*clear.*/>', re.IGNORECASE), lambda m: '')]
# Create regular expressions for tag keeping and removal to make the matches more
# robust against minor changes and errors in the HTML, Eg. double spaces, leading
# and trailing spaces, missing hyphens, and such like.
# Python regular expression ('re' class) page: http://docs.python.org/library/re.html
# ***************************************
# Regular expressions for keep_only_tags:
# ***************************************
# The BBC News HTML pages use variants of 'storybody' to denote the section of a HTML
# page which contains the main text of the article. Match storybody variants: 'storybody',
# 'story-body', 'story body','storybody ', etc.
storybody_reg_exp = '^.*story[_ -]*body.*$'
# The BBC sport and 'newsbeat' (features) HTML pages use 'blq_content' to hold the title
# and published date. This is one level above the usual news pages which have the title
# and date within 'story-body'. This is annoying since 'blq_content' must also be kept,
# resulting in a lot of extra things to be removed by remove_tags.
blq_content_reg_exp = '^.*blq[_ -]*content.*$'
# The BBC has an alternative page design structure, which I suspect is an out-of-date
# design but which is still used in some articles, Eg. 'Click' (technology), 'FastTrack'
# (travel), and in some sport pages. These alternative pages are table based (which is
# why I think they are an out-of-date design) and account for -I'm guesstimaking- less
# than 1% of all articles. They use a table class 'storycontent' to hold the article
# and like blq_content (above) have required lots of extra removal by remove_tags.
story_content_reg_exp = '^.*story[_ -]*content.*$'
# Keep the sections of the HTML which match the list below. The HTML page created by
# Calibre will fill <body> with those sections which are matched. Note that the
# blq_content_reg_exp must be listed before storybody_reg_exp in keep_only_tags due to
# it being the parent of storybody_reg_exp, that is to say the div class/id 'story-body'
# will be inside div class/id 'blq_content' in the HTML (if 'blq_content' is there at
# all). If they are the other way around in keep_only_tags then blq_content_reg_exp
# will end up being discarded.
keep_only_tags = [ dict(name='table', attrs={'class':re.compile(story_content_reg_exp, re.IGNORECASE)}),
dict(name='div', attrs={'class':re.compile(blq_content_reg_exp, re.IGNORECASE)}),
dict(name='div', attrs={'id':re.compile(blq_content_reg_exp, re.IGNORECASE)}),
dict(name='div', attrs={'class':re.compile(storybody_reg_exp, re.IGNORECASE)}),
dict(name='div', attrs={'id':re.compile(storybody_reg_exp, re.IGNORECASE)}) ]
# ************************************
# Regular expressions for remove_tags:
# ************************************
# Regular expression to remove share-help and variant tags. The share-help class
# is used by the site for a variety of 'sharing' type links, Eg. Facebook, delicious,
# twitter, email. Removed to avoid page clutter.
share_help_reg_exp = '^.*share[_ -]*help.*$'
# Regular expression to remove embedded-hyper and variant tags. This class is used to
# display links to other BBC News articles on the same/similar subject.
embedded_hyper_reg_exp = '^.*embed*ed[_ -]*hyper.*$'
# Regular expression to remove hypertabs and variant tags. This class is used to
# display a tab bar at the top of an article which allows the user to switch to
# an article (viewed on the same page) providing further info., 'in depth' analysis,
# an editorial, a correspondant's blog entry, and such like. The ability to handle
# a tab bar of this nature is currently beyond the scope of this recipe and
# possibly of Calibre itself (not sure about that - TO DO - check!).
hypertabs_reg_exp = '^.*hyper[_ -]*tabs.*$'
# Regular expression to remove story-feature and variant tags. Eg. 'story-feature',
# 'story-feature related narrow', 'story-feature wide', 'story-feature narrow'.
# This class is used to add additional info. boxes, or small lists, outside of
# the main story. TO DO: Work out a way to incorporate these neatly.
story_feature_reg_exp = '^.*story[_ -]*feature.*$'
# Regular expression to remove video and variant tags, Eg. 'videoInStoryB',
# 'videoInStoryC'. This class is used to embed video.
video_reg_exp = '^.*video.*$'
# Regular expression to remove audio and variant tags, Eg. 'audioInStoryD'.
# This class is used to embed audio.
audio_reg_exp = '^.*audio.*$'
# Regular expression to remove pictureGallery and variant tags, Eg. 'pictureGallery'.
# This class is used to embed a photo slideshow. See also 'slideshow' below.
picture_gallery_reg_exp = '^.*picture.*$'
# Regular expression to remove slideshow and variant tags, Eg. 'dslideshow-enclosure'.
# This class is used to embed a slideshow (not necessarily photo) but both
# 'slideshow' and 'pictureGallery' are used for slideshows.
slideshow_reg_exp = '^.*slide[_ -]*show.*$'
# Regular expression to remove social-links and variant tags. This class is used to
# display links to a BBC bloggers main page, used in various columnist's blogs
# (Eg. Nick Robinson, Robert Preston).
social_links_reg_exp = '^.*social[_ -]*links.*$'
# Regular expression to remove quote and (multi) variant tags, Eg. 'quote',
# 'endquote', 'quote-credit', 'quote-credit-title', etc. These are usually
# removed by 'story-feature' removal (as they are usually within them), but
# not always. The quotation removed is always (AFAICT) in the article text
# as well but a 2nd copy is placed in a quote tag to draw attention to it.
# The quote class tags may or may not appear in div's.
quote_reg_exp = '^.*quote.*$'
# Regular expression to remove hidden and variant tags, Eg. 'hidden'.
# The purpose of these is unclear, they seem to be an internal link to a
# section within the article, but the text of the link (Eg. 'Continue reading
# the main story') never seems to be displayed anyway. Removed to avoid clutter.
# The hidden class tags may or may not appear in div's.
hidden_reg_exp = '^.*hidden.*$'
# Regular expression to remove comment and variant tags, Eg. 'comment-introduction'.
# Used on the site to display text about registered users entering comments.
comment_reg_exp = '^.*comment.*$'
# Regular expression to remove form and variant tags, Eg. 'comment-form'.
# Used on the site to allow registered BBC users to fill in forms, typically
# for entering comments about an article.
form_reg_exp = '^.*form.*$'
# Extra things to remove due to the addition of 'blq_content' in keep_only_tags.
#<div class="story-actions"> Used on sports pages for 'email' and 'print'.
story_actions_reg_exp = '^.*story[_ -]*actions.*$'
#<div class="bookmark-list"> Used on sports pages instead of 'share-help' (for
# social networking links).
bookmark_list_reg_exp = '^.*bookmark[_ -]*list.*$'
#<div id="secondary-content" class="content-group">
# NOTE: Don't remove class="content-group" that is needed.
# Used on sports pages to link to 'similar stories'.
secondary_content_reg_exp = '^.*secondary[_ -]*content.*$'
#<div id="featured-content" class="content-group">
# NOTE: Don't remove class="content-group" that is needed.
# Used on sports pages to link to pages like 'tables', 'fixtures', etc.
featured_content_reg_exp = '^.*featured[_ -]*content.*$'
#<div id="navigation">
# Used on sports pages to link to pages like 'tables', 'fixtures', etc.
# Used sometimes instead of "featured-content" above.
navigation_reg_exp = '^.*navigation.*$'
#<a class="skip" href="#blq-container-inner">Skip to top</a>
# Used on sports pages to link to the top of the page.
skip_reg_exp = '^.*skip.*$'
# Extra things to remove due to the addition of 'storycontent' in keep_only_tags,
# which are the alterative table design based pages. The purpose of some of these
# is not entirely clear from the pages (which are a total mess!).
# Remove mapping based tags, Eg. <map id="world_map">
# The dynamic maps don't seem to work during ebook creation. TO DO: Investigate.
map_reg_exp = '^.*map.*$'
# Remove social bookmarking variation, called 'socialBookMarks'.
social_bookmarks_reg_exp = '^.*social[_ -]*bookmarks.*$'
# Remove page navigation tools, like 'search', 'email', 'print', called 'blq-mast'.
blq_mast_reg_exp = '^.*blq[_ -]*mast.*$'
# Remove 'sharesb', I think this is a generic 'sharing' class. It seems to appear
# alongside 'socialBookMarks' whenever that appears. I am removing it as well
# under the assumption that it can appear alone as well.
sharesb_reg_exp = '^.*sharesb.*$'
# Remove class 'o'. The worst named user created css class of all time. The creator
# should immediately be fired. I've seen it used to hold nothing at all but with
# 20 or so empty lines in it. Also to hold a single link to another article.
# Whatever it was designed to do it is not wanted by this recipe. Exact match only.
o_reg_exp = '^o$'
# Remove 'promotopbg' and 'promobottombg', link lists. Have decided to
# use two reg expressions to make removing this (and variants) robust.
promo_top_reg_exp = '^.*promotopbg.*$'
promo_bottom_reg_exp = '^.*promobottombg.*$'
# Remove 'nlp', provides heading for link lists. Requires an exact match due to
# risk of matching those letters in something needed, unless I see a variation
# of 'nlp' used at a later date.
nlp_reg_exp = '^nlp$'
# Remove 'mva', provides embedded floating content of various types. Variant 'mvb'
# has also now been seen. Requires an exact match of 'mva' or 'mvb' due to risk of
# matching those letters in something needed.
mva_or_mvb_reg_exp = '^mv[ab]$'
# Remove 'mvtb', seems to be page navigation tools, like 'blq-mast'.
mvtb_reg_exp = '^mvtb$'
# Remove 'blq-toplink', class to provide a link to the top of the page.
blq_toplink_reg_exp = '^.*blq[_ -]*top[_ -]*link.*$'
# Remove 'products and services' links, Eg. desktop tools, alerts, and so on.
# Eg. Class="servicev4 ukfs_services" - what a mess of a name. Have decided to
# use two reg expressions to make removing this (and variants) robust.
prods_services_01_reg_exp = '^.*servicev4.*$'
prods_services_02_reg_exp = '^.*ukfs[_ -]*services.*$'
# Remove -what I think is- some kind of navigation tools helper class, though I am
# not sure, it's called: 'blq-rst blq-new-nav'. What I do know is it pops up
# frequently and it is not wanted. Have decided to use two reg expressions to make
# removing this (and variants) robust.
blq_misc_01_reg_exp = '^.*blq[_ -]*rst.*$'
blq_misc_02_reg_exp = '^.*blq[_ -]*new[_ -]*nav.*$'
# Remove 'puffbox' - this may only appear inside 'storyextra', so it may not
# need removing - I have no clue what it does other than it contains links.
# Whatever it is - it is not part of the article and is not wanted.
puffbox_reg_exp = '^.*puffbox.*$'
# Remove 'sibtbg' and 'sibtbgf' - some kind of table formatting classes.
sibtbg_reg_exp = '^.*sibtbg.*$'
# Remove 'storyextra' - links to relevant articles and external sites.
storyextra_reg_exp = '^.*story[_ -]*extra.*$'
remove_tags = [ dict(name='div', attrs={'class':re.compile(story_feature_reg_exp, re.IGNORECASE)}),
dict(name='div', attrs={'class':re.compile(share_help_reg_exp, re.IGNORECASE)}),
dict(name='div', attrs={'class':re.compile(embedded_hyper_reg_exp, re.IGNORECASE)}),
dict(name='div', attrs={'class':re.compile(hypertabs_reg_exp, re.IGNORECASE)}),
dict(name='div', attrs={'class':re.compile(video_reg_exp, re.IGNORECASE)}),
dict(name='div', attrs={'class':re.compile(audio_reg_exp, re.IGNORECASE)}),
dict(name='div', attrs={'class':re.compile(picture_gallery_reg_exp, re.IGNORECASE)}),
dict(name='div', attrs={'class':re.compile(slideshow_reg_exp, re.IGNORECASE)}),
dict(name='div', attrs={'class':re.compile(quote_reg_exp, re.IGNORECASE)}),
dict(name='div', attrs={'class':re.compile(hidden_reg_exp, re.IGNORECASE)}),
dict(name='div', attrs={'class':re.compile(comment_reg_exp, re.IGNORECASE)}),
dict(name='div', attrs={'class':re.compile(story_actions_reg_exp, re.IGNORECASE)}),
dict(name='div', attrs={'class':re.compile(bookmark_list_reg_exp, re.IGNORECASE)}),
dict(name='div', attrs={'id':re.compile(secondary_content_reg_exp, re.IGNORECASE)}),
dict(name='div', attrs={'id':re.compile(featured_content_reg_exp, re.IGNORECASE)}),
dict(name='div', attrs={'id':re.compile(navigation_reg_exp, re.IGNORECASE)}),
dict(name='form', attrs={'id':re.compile(form_reg_exp, re.IGNORECASE)}),
dict(attrs={'class':re.compile(quote_reg_exp, re.IGNORECASE)}),
dict(attrs={'class':re.compile(hidden_reg_exp, re.IGNORECASE)}),
dict(attrs={'class':re.compile(social_links_reg_exp, re.IGNORECASE)}),
dict(attrs={'class':re.compile(comment_reg_exp, re.IGNORECASE)}),
dict(attrs={'class':re.compile(skip_reg_exp, re.IGNORECASE)}),
dict(name='map', attrs={'id':re.compile(map_reg_exp, re.IGNORECASE)}),
dict(name='map', attrs={'name':re.compile(map_reg_exp, re.IGNORECASE)}),
dict(name='div', attrs={'id':re.compile(social_bookmarks_reg_exp, re.IGNORECASE)}),
dict(name='div', attrs={'id':re.compile(blq_mast_reg_exp, re.IGNORECASE)}),
dict(name='div', attrs={'class':re.compile(sharesb_reg_exp, re.IGNORECASE)}),
dict(name='div', attrs={'class':re.compile(o_reg_exp, re.IGNORECASE)}),
dict(name='div', attrs={'class':re.compile(promo_top_reg_exp, re.IGNORECASE)}),
dict(name='div', attrs={'class':re.compile(promo_bottom_reg_exp, re.IGNORECASE)}),
dict(name='div', attrs={'class':re.compile(nlp_reg_exp, re.IGNORECASE)}),
dict(name='div', attrs={'class':re.compile(mva_or_mvb_reg_exp, re.IGNORECASE)}),
dict(name='div', attrs={'class':re.compile(mvtb_reg_exp, re.IGNORECASE)}),
dict(name='div', attrs={'class':re.compile(blq_toplink_reg_exp, re.IGNORECASE)}),
dict(name='div', attrs={'class':re.compile(prods_services_01_reg_exp, re.IGNORECASE)}),
dict(name='div', attrs={'class':re.compile(prods_services_02_reg_exp, re.IGNORECASE)}),
dict(name='div', attrs={'class':re.compile(blq_misc_01_reg_exp, re.IGNORECASE)}),
dict(name='div', attrs={'class':re.compile(blq_misc_02_reg_exp, re.IGNORECASE)}),
dict(name='div', attrs={'class':re.compile(puffbox_reg_exp, re.IGNORECASE)}),
dict(attrs={'class':re.compile(sibtbg_reg_exp, re.IGNORECASE)}),
dict(attrs={'class':re.compile(storyextra_reg_exp, re.IGNORECASE)}),
dict(name='div', attrs={'class':'tools-container'}),
dict(name='div', attrs={'class':'tools-container-end'}),
dict(name='div', attrs={'class':'g-block story-body contextual-links'}),
dict(name='div', attrs={'class':' g-w11 sidebar'})
]
# Uses url to create and return the 'printer friendly' version of the url.
# In other words the 'print this page' address of the page.
#
# There are 3 types of urls used in the BBC site's rss feeds. There is just
# 1 type for the standard news while there are 2 used for sports feed urls.
# Note: Sports urls are linked from regular news feeds (Eg. 'News Home') when
# there is a major story of interest to 'everyone'. So even if no BBC sports
# feeds are added to 'feeds' the logic of this method is still needed to avoid
# blank / missing / empty articles which have an index title and then no body.
def print_version(self, url):
# Handle sports page urls type 01:
if (url.find("go/rss/-/sport1/") != -1):
temp_url = url.replace("go/rss/-/", "")
# Handle sports page urls type 02:
elif (url.find("go/rss/int/news/-/sport1/") != -1):
temp_url = url.replace("go/rss/int/news/-/", "")
# Handle regular news page urls:
else:
temp_url = url.replace("go/rss/int/news/-/", "")
# Always add "?print=true" to the end of the url.
print_url = temp_url + "?print=true"
return print_url
# Remove articles in feeds based on a string in the article title or url.
#
# Code logic written by: Starson17 - posted in: "Recipes - Re-usable code"
# thread, in post with title: "Remove articles from feed", see url:
# http://www.mobileread.com/forums/showpost.php?p=1165462&postcount=6
# Many thanks and all credit to Starson17.
#
# Starson17's code has obviously been altered to suite my requirements.
def parse_feeds(self):
# Call parent's method.
feeds = BasicNewsRecipe.parse_feeds(self)
# Loop through all feeds.
for feed in feeds:
# Loop through all articles in feed.
for article in feed.articles[:]:
# Match key words and remove article if there's a match.
# Most BBC rss feed video only 'articles' use upper case 'VIDEO'
# as a title prefix. Just match upper case 'VIDEO', so that
# articles like 'Video game banned' won't be matched and removed.
if 'VIDEO' in article.title:
feed.articles.remove(article)
# Most BBC rss feed audio only 'articles' use upper case 'AUDIO'
# as a title prefix. Just match upper case 'AUDIO', so that
# articles like 'Hi-Def audio...' won't be matched and removed.
elif 'AUDIO' in article.title:
feed.articles.remove(article)
# Most BBC rss feed photo slideshow 'articles' use 'In Pictures',
# 'In pictures', and 'in pictures', somewhere in their title.
# Match any case of that phrase.
elif 'IN PICTURES' in article.title.upper():
feed.articles.remove(article)
# As above, but user contributed pictures. Match any case.
elif 'YOUR PICTURES' in article.title.upper():
feed.articles.remove(article)
# 'Sportsday Live' are articles which contain a constantly and
# dynamically updated 'running commentary' during a live sporting
# event. Match any case.
elif 'SPORTSDAY LIVE' in article.title.upper():
feed.articles.remove(article)
# Sometimes 'Sportsday Live' (above) becomes 'Live - Sport Name'.
# These are being matched below using 'Live - ' because removing all
# articles with 'live' in their titles would remove some articles
# that are in fact not live sports pages. Match any case.
elif 'LIVE - ' in article.title.upper():
feed.articles.remove(article)
# 'Quiz of the week' is a Flash player weekly news quiz. Match only
# the 'Quiz of the' part in anticipation of monthly and yearly
# variants. Match any case.
elif 'QUIZ OF THE' in article.title.upper():
feed.articles.remove(article)
# Remove articles with 'scorecards' in the url. These are BBC sports
# pages which just display a cricket scorecard. The pages have a mass
# of table and css entries to display the scorecards nicely. Probably
# could make them work with this recipe, but might take a whole day
# of work to sort out all the css - basically a formatting nightmare.
elif 'scorecards' in article.url:
feed.articles.remove(article)
return feeds
# End of class and file.

Binary file not shown.

View File

@ -0,0 +1,11 @@
<!DOCTYPE html>
<html>
<head>
<title>blank</title>
<meta http-equiv="content-type" content="text/html; charset=utf-8"/>
</head>
<body>
<div>&nbsp;</div>
</body>
</html>

View File

@ -34,6 +34,7 @@ isfrozen = hasattr(sys, 'frozen')
isunix = isosx or islinux isunix = isosx or islinux
isportable = os.environ.get('CALIBRE_PORTABLE_BUILD', None) is not None isportable = os.environ.get('CALIBRE_PORTABLE_BUILD', None) is not None
ispy3 = sys.version_info.major > 2 ispy3 = sys.version_info.major > 2
isxp = iswindows and sys.getwindowsversion().major < 6
try: try:
preferred_encoding = locale.getpreferredencoding() preferred_encoding = locale.getpreferredencoding()

View File

@ -224,7 +224,18 @@ def parse_index_record(table, data, control_byte_count, tags, codec,
for j in xrange(entry_count): for j in xrange(entry_count):
start, end = idx_positions[j:j+2] start, end = idx_positions[j:j+2]
rec = data[start:end] rec = data[start:end]
ident, consumed = decode_string(rec, codec=codec, ordt_map=ordt_map) # Sometimes (in the guide table if the type attribute has non ascii
# values) the ident is UTF-16 encoded. Try to handle that.
try:
ident, consumed = decode_string(rec, codec=codec, ordt_map=ordt_map)
except UnicodeDecodeError:
ident, consumed = decode_string(rec, codec='utf-16', ordt_map=ordt_map)
if u'\x00' in ident:
try:
ident, consumed = decode_string(rec, codec='utf-16',
ordt_map=ordt_map)
except UnicodeDecodeError:
ident = ident.replace('u\x00', u'')
rec = rec[consumed:] rec = rec[consumed:]
tag_map = get_tag_map(control_byte_count, tags, rec, strict=strict) tag_map = get_tag_map(control_byte_count, tags, rec, strict=strict)
table[ident] = tag_map table[ident] = tag_map

View File

@ -0,0 +1,250 @@
#!/usr/bin/env coffee
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
###
Copyright 2012, Kovid Goyal <kovid@kovidgoyal.net>
Released under the GPLv3 License
###
log = (args...) -> # {{{
if args
msg = args.join(' ')
if window?.console?.log
window.console.log(msg)
else if process?.stdout?.write
process.stdout.write(msg + '\n')
# }}}
body_height = () -> # {{{
db = document.body
dde = document.documentElement
if db? and dde?
return Math.max(db.scrollHeight, dde.scrollHeight, db.offsetHeight,
dde.offsetHeight, db.clientHeight, dde.clientHeight)
return 0
# }}}
window_scroll_pos = (win=window) -> # {{{
if typeof(win.pageXOffset) == 'number'
x = win.pageXOffset
y = win.pageYOffset
else # IE < 9
if document.body and ( document.body.scrollLeft or document.body.scrollTop )
x = document.body.scrollLeft
y = document.body.scrollTop
else if document.documentElement and ( document.documentElement.scrollLeft or document.documentElement.scrollTop)
y = document.documentElement.scrollTop
x = document.documentElement.scrollLeft
return [x, y]
# }}}
viewport_to_document = (x, y, doc=window?.document) -> # {{{
until doc == window.document
# We are in a frame
frame = doc.defaultView.frameElement
rect = frame.getBoundingClientRect()
x += rect.left
y += rect.top
doc = frame.ownerDocument
win = doc.defaultView
[wx, wy] = window_scroll_pos(win)
x += wx
y += wy
return [x, y]
# }}}
absleft = (elem) -> # {{{
r = elem.getBoundingClientRect()
return viewport_to_document(r.left, 0, elem.ownerDocument)[0]
# }}}
class PagedDisplay
###
This class is a namespace to expose functions via the
window.paged_display object. The most important functions are:
layout(): causes the currently loaded document to be laid out in columns.
###
constructor: () ->
this.set_geometry()
this.page_width = 0
this.screen_width = 0
this.in_paged_mode = false
this.current_margin_side = 0
set_geometry: (cols_per_screen=2, margin_top=20, margin_side=40, margin_bottom=20) ->
this.margin_top = margin_top
this.margin_side = margin_side
this.margin_bottom = margin_bottom
this.cols_per_screen = cols_per_screen
layout: () ->
ww = window.innerWidth
wh = window.innerHeight
body_height = wh - this.margin_bottom = this.margin_top
n = this.cols_per_screen
# Calculate the column width so that cols_per_screen columns fit in the
# window in such a way the right margin of the last column is <=
# side_margin (it may be less if the window width is not a
# multiple of n*(col_width+2*side_margin).
adjust = ww - Math.floor(ww/n)*n
# Ensure that the margins are large enough that the adjustment does not
# cause them to become negative semidefinite
sm = Math.max(2*adjust, this.margin_side)
# Minimum column width, for the cases when the window is too
# narrow
col_width = Math.max(100, ((ww - adjust)/n) - 2*sm)
this.page_width = col_width + 2*sm
this.screen_width = this.page_width * this.cols_per_screen
body_style = window.getComputedStyle(document.body)
fgcolor = body_style.getPropertyValue('color')
bs = document.body.style
bs.setProperty('-webkit-column-gap', (2*sm)+'px')
bs.setProperty('-webkit-column-width', col_width+'px')
bs.setProperty('-webkit-column-rule-color', fgcolor)
bs.setProperty('overflow', 'visible')
bs.setProperty('height', 'auto')
bs.setProperty('width', 'auto')
bs.setProperty('margin-top', this.margin_top+'px')
bs.setProperty('margin-bottom', this.margin_bottom+'px')
bs.setProperty('margin-left', sm+'px')
bs.setProperty('margin-right', sm+'px')
for edge in ['left', 'right', 'top', 'bottom']
bs.setProperty('padding-'+edge, '0px')
bs.setProperty('border-'+edge+'-width', '0px')
bs.setProperty('min-width', '0')
bs.setProperty('max-width', 'none')
bs.setProperty('min-height', '0')
bs.setProperty('max-height', 'none')
# Ensure that the top margin is correct, otherwise for some documents,
# webkit lays out the body with a lot of space on top
brect = document.body.getBoundingClientRect()
if brect.top > this.margin_top
bs.setProperty('margin-top', (this.margin_top - brect.top)+'px')
brect = document.body.getBoundingClientRect()
this.in_paged_mode = true
this.current_margin_side = sm
return sm
scroll_to_pos: (frac) ->
# Scroll to the position represented by frac (number between 0 and 1)
xpos = Math.floor(document.body.scrollWidth * frac)
this.scroll_to_xpos(xpos)
scroll_to_xpos: (xpos) ->
# Scroll so that the column containing xpos is the left most column in
# the viewport
if typeof(xpos) != 'number'
log(xpos, 'is not a number, cannot scroll to it!')
return
pos = 0
until (pos <= xpos < pos + this.page_width)
pos += this.page_width
limit = document.body.scrollWidth - this.screen_width
pos = limit if pos > limit
window.scrollTo(pos, 0)
current_pos: (frac) ->
# The current scroll position as a fraction between 0 and 1
limit = document.body.scrollWidth - window.innerWidth
if limit <= 0
return 0.0
return window.pageXOffset / limit
current_column_location: () ->
# The location of the left edge of the left most column currently
# visible in the viewport
x = window.pageXOffset + Math.max(10, this.current_margin_side)
edge = Math.floor(x/this.page_width)
while edge < x
edge += this.page_width
return edge - this.page_width
next_screen_location: () ->
# The position to scroll to for the next screen (which could contain
# more than one pages). Returns -1 if no further scrolling is possible.
cc = this.current_column_location()
ans = cc + this.screen_width
limit = document.body.scrollWidth - window.innerWidth
if ans > limit
ans = if window.pageXOffset < limit then limit else -1
return ans
previous_screen_location: () ->
# The position to scroll to for the previous screen (which could contain
# more than one pages). Returns -1 if no further scrolling is possible.
cc = this.current_column_location()
ans = cc - this.screen_width
if ans < 0
# We ignore small scrolls (less than 15px) when going to previous
# screen
ans = if window.pageXOffset > 15 then 0 else -1
return ans
next_col_location: () ->
# The position to scroll to for the next column (same as
# next_screen_location() if columns per screen == 1). Returns -1 if no
# further scrolling is possible.
cc = this.current_column_location()
ans = cc + this.page_width
limit = document.body.scrollWidth - window.innerWidth
if ans > limit
ans = if window.pageXOffset < limit then limit else -1
return ans
previous_col_location: () ->
# The position to scroll to for the previous column (same as
# previous_screen_location() if columns per screen == 1). Returns -1 if
# no further scrolling is possible.
cc = this.current_column_location()
ans = cc - this.page_width
if ans < 0
ans = if window.pageXOffset > 0 then 0 else -1
return ans
jump_to_anchor: (name) ->
# Jump to the element identified by anchor name. Ensures that the left
# most column in the viewport is the column containing the start of the
# element and that the scroll position is at the start of the column.
elem = document.getElementById(name)
if !elem
elems = document.getElementsByName(name)
if elems
elem = elems[0]
if !elem
return
elem.scrollIntoView()
if this.in_paged_mode
# Ensure we are scrolled to the column containing elem
this.scroll_to_xpos(absleft(elem) + 5)
snap_to_selection: () ->
# Ensure that the viewport is positioned at the start of the column
# containing the start of the current selection
if this.in_paged_mode
sel = window.getSelection()
r = sel.getRangeAt(0).getBoundingClientRect()
node = sel.anchorNode
left = viewport_to_document(r.left, r.top, doc=node.ownerDocument)[0]
# Ensure we are scrolled to the column containing the start of the
# selection
this.scroll_to_xpos(left+5)
if window?
window.paged_display = new PagedDisplay()
# TODO:
# css pagebreak rules
# CFI and bookmarks
# Go to reference positions
# Indexing
# Resizing of images
# Special handling for identifiable covers (colspan)?
# Full screen mode

View File

@ -31,12 +31,15 @@ def self_closing_sub(match):
return '<%s %s></%s>'%(match.group(1), match.group(2), match.group(1)) return '<%s %s></%s>'%(match.group(1), match.group(2), match.group(1))
def load_html(path, view, codec='utf-8', mime_type=None, def load_html(path, view, codec='utf-8', mime_type=None,
pre_load_callback=lambda x:None): pre_load_callback=lambda x:None, path_is_html=False):
from PyQt4.Qt import QUrl, QByteArray from PyQt4.Qt import QUrl, QByteArray
if mime_type is None: if mime_type is None:
mime_type = guess_type(path)[0] mime_type = guess_type(path)[0]
with open(path, 'rb') as f: if path_is_html:
html = f.read().decode(codec, 'replace') html = path
else:
with open(path, 'rb') as f:
html = f.read().decode(codec, 'replace')
html = EntityDeclarationProcessor(html).processed_html html = EntityDeclarationProcessor(html).processed_html
has_svg = re.search(r'<[:a-zA-Z]*svg', html) is not None has_svg = re.search(r'<[:a-zA-Z]*svg', html) is not None

View File

@ -7,7 +7,7 @@ from urllib import unquote
from PyQt4.Qt import (QVariant, QFileInfo, QObject, SIGNAL, QBuffer, Qt, from PyQt4.Qt import (QVariant, QFileInfo, QObject, SIGNAL, QBuffer, Qt,
QByteArray, QTranslator, QCoreApplication, QThread, QByteArray, QTranslator, QCoreApplication, QThread,
QEvent, QTimer, pyqtSignal, QDateTime, QDesktopServices, QEvent, QTimer, pyqtSignal, QDateTime, QDesktopServices,
QFileDialog, QFileIconProvider, QSettings, QFileDialog, QFileIconProvider, QSettings, QColor,
QIcon, QApplication, QDialog, QUrl, QFont, QPalette) QIcon, QApplication, QDialog, QUrl, QFont, QPalette)
ORG_NAME = 'KovidsBrain' ORG_NAME = 'KovidsBrain'
@ -738,11 +738,18 @@ class Application(QApplication):
def load_calibre_style(self): def load_calibre_style(self):
# On OS X QtCurve resets the palette, so we preserve it explicitly # On OS X QtCurve resets the palette, so we preserve it explicitly
orig_pal = QPalette(self.palette()) orig_pal = QPalette(self.palette())
from calibre.constants import plugins from calibre.constants import plugins
pi = plugins['progress_indicator'][0] pi = plugins['progress_indicator'][0]
path = os.path.join(sys.extensions_location, 'calibre_style.'+( path = os.path.join(sys.extensions_location, 'calibre_style.'+(
'pyd' if iswindows else 'so')) 'pyd' if iswindows else 'so'))
pi.load_style(path, 'Calibre') pi.load_style(path, 'Calibre')
# On OSX, on some machines, colors can be invalid. See https://bugs.launchpad.net/bugs/1014900
for role in (orig_pal.Button, orig_pal.Window):
c = orig_pal.brush(role).color()
if not c.isValid() or not c.toRgb().isValid():
orig_pal.setColor(role, QColor(u'lightgray'))
self.setPalette(orig_pal) self.setPalette(orig_pal)
style = self.style() style = self.style()
icon_map = {} icon_map = {}

View File

@ -11,7 +11,7 @@ import zipfile
from PyQt4.Qt import QFont, QVariant, QDialog from PyQt4.Qt import QFont, QVariant, QDialog
from calibre.constants import iswindows from calibre.constants import iswindows, isxp
from calibre.utils.config import Config, StringConfig from calibre.utils.config import Config, StringConfig
from calibre.gui2.shortcuts import ShortcutConfig from calibre.gui2.shortcuts import ShortcutConfig
from calibre.gui2.viewer.config_ui import Ui_Dialog from calibre.gui2.viewer.config_ui import Ui_Dialog
@ -113,7 +113,10 @@ class ConfigDialog(QDialog, Ui_Dialog):
p = self.tabs.widget(1) p = self.tabs.widget(1)
p.layout().addWidget(self.shortcut_config) p.layout().addWidget(self.shortcut_config)
self.opt_fit_images.setChecked(opts.fit_images) self.opt_fit_images.setChecked(opts.fit_images)
if isxp:
self.hyphenate.setVisible(False)
self.hyphenate_default_lang.setVisible(False)
self.hyphenate_label.setVisible(False)
def accept(self, *args): def accept(self, *args):
if self.shortcut_config.is_editing: if self.shortcut_config.is_editing:

View File

@ -196,7 +196,7 @@
</widget> </widget>
</item> </item>
<item row="6" column="0"> <item row="6" column="0">
<widget class="QLabel" name="label_8"> <widget class="QLabel" name="hyphenate_label">
<property name="text"> <property name="text">
<string>Default &amp;language for hyphenation:</string> <string>Default &amp;language for hyphenation:</string>
</property> </property>

View File

@ -22,7 +22,8 @@ from calibre.gui2.viewer.javascript import JavaScriptLoader
from calibre.gui2.viewer.position import PagePosition from calibre.gui2.viewer.position import PagePosition
from calibre.gui2.viewer.config import config, ConfigDialog from calibre.gui2.viewer.config import config, ConfigDialog
from calibre.ebooks.oeb.display.webview import load_html from calibre.ebooks.oeb.display.webview import load_html
from calibre.utils.config import tweaks
from calibre.constants import isxp
# }}} # }}}
def load_builtin_fonts(): def load_builtin_fonts():
@ -59,10 +60,12 @@ class Document(QWebPage): # {{{
def __init__(self, shortcuts, parent=None, debug_javascript=False): def __init__(self, shortcuts, parent=None, debug_javascript=False):
QWebPage.__init__(self, parent) QWebPage.__init__(self, parent)
self.setObjectName("py_bridge") self.setObjectName("py_bridge")
self.in_paged_mode = tweaks.get('viewer_test_paged_mode', False)
# Use this to pass arbitrary JSON encodable objects between python and # Use this to pass arbitrary JSON encodable objects between python and
# javascript. In python get/set the value as: self.bridge_value. In # javascript. In python get/set the value as: self.bridge_value. In
# javascript, get/set the value as: py_bridge.value # javascript, get/set the value as: py_bridge.value
self.bridge_value = None self.bridge_value = None
self.first_load = True
self.debug_javascript = debug_javascript self.debug_javascript = debug_javascript
self.anchor_positions = {} self.anchor_positions = {}
@ -104,6 +107,13 @@ class Document(QWebPage): # {{{
self.mainFrame().javaScriptWindowObjectCleared.connect( self.mainFrame().javaScriptWindowObjectCleared.connect(
self.add_window_objects) self.add_window_objects)
self.turn_off_internal_scrollbars()
def turn_off_internal_scrollbars(self):
mf = self.mainFrame()
mf.setScrollBarPolicy(Qt.Vertical, Qt.ScrollBarAlwaysOff)
mf.setScrollBarPolicy(Qt.Horizontal, Qt.ScrollBarAlwaysOff)
def set_user_stylesheet(self): def set_user_stylesheet(self):
raw = config().parse().user_css raw = config().parse().user_css
raw = '::selection {background:#ffff00; color:#000;}\nbody {background-color: white;}\n'+raw raw = '::selection {background:#ffff00; color:#000;}\nbody {background-color: white;}\n'+raw
@ -154,7 +164,8 @@ class Document(QWebPage): # {{{
@pyqtSignature("") @pyqtSignature("")
def init_hyphenate(self): def init_hyphenate(self):
if self.hyphenate and getattr(self, 'loaded_lang', ''): # Qt fails to render soft hyphens correctly on windows xp
if not isxp and self.hyphenate and getattr(self, 'loaded_lang', ''):
self.javascript('do_hyphenation("%s")'%self.loaded_lang) self.javascript('do_hyphenation("%s")'%self.loaded_lang)
def _pass_json_value_getter(self): def _pass_json_value_getter(self):
@ -175,9 +186,12 @@ class Document(QWebPage): # {{{
'document.body.style.marginLeft').toString()) 'document.body.style.marginLeft').toString())
self.initial_right_margin = unicode(self.javascript( self.initial_right_margin = unicode(self.javascript(
'document.body.style.marginRight').toString()) 'document.body.style.marginRight').toString())
if self.in_paged_mode:
self.switch_to_paged_mode()
if self.in_fullscreen_mode: if self.in_fullscreen_mode:
self.switch_to_fullscreen_mode() self.switch_to_fullscreen_mode()
self.read_anchor_positions(use_cache=False) self.read_anchor_positions(use_cache=False)
self.first_load = False
def read_anchor_positions(self, use_cache=True): def read_anchor_positions(self, use_cache=True):
self.bridge_value = tuple(self.index_anchors) self.bridge_value = tuple(self.index_anchors)
@ -190,6 +204,22 @@ class Document(QWebPage): # {{{
self.anchor_positions = {} self.anchor_positions = {}
return self.anchor_positions return self.anchor_positions
def switch_to_paged_mode(self, onresize=False):
side_margin = self.javascript('paged_display.layout()', typ=int)
# Setup the contents size to ensure that there is a right most margin.
# Without this webkit renders the final column with no margin, as the
# columns extend beyond the boundaries (and margin) of body
mf = self.mainFrame()
sz = mf.contentsSize()
if sz.width() > self.window_width:
sz.setWidth(sz.width()+side_margin)
self.setPreferredContentsSize(sz)
def after_resize(self):
if self.in_paged_mode:
self.setPreferredContentsSize(QSize())
self.switch_to_paged_mode(onresize=True)
def switch_to_fullscreen_mode(self): def switch_to_fullscreen_mode(self):
self.in_fullscreen_mode = True self.in_fullscreen_mode = True
self.javascript(''' self.javascript('''
@ -233,20 +263,21 @@ class Document(QWebPage): # {{{
def javascript(self, string, typ=None): def javascript(self, string, typ=None):
ans = self.mainFrame().evaluateJavaScript(string) ans = self.mainFrame().evaluateJavaScript(string)
if typ == 'int': if typ in {'int', int}:
ans = ans.toInt() ans = ans.toInt()
if ans[1]: if ans[1]:
return ans[0] return ans[0]
return 0 return 0
if typ in {'float', float}:
ans = ans.toReal()
return ans[0] if ans[1] else 0.0
if typ == 'string': if typ == 'string':
return unicode(ans.toString()) return unicode(ans.toString())
return ans return ans
def javaScriptConsoleMessage(self, msg, lineno, msgid): def javaScriptConsoleMessage(self, msg, lineno, msgid):
if self.debug_javascript: if self.debug_javascript:
prints( 'JS:', msgid, lineno)
prints(msg) prints(msg)
prints(' ')
else: else:
return QWebPage.javaScriptConsoleMessage(self, msg, lineno, msgid) return QWebPage.javaScriptConsoleMessage(self, msg, lineno, msgid)
@ -263,13 +294,7 @@ class Document(QWebPage): # {{{
self.mainFrame().setScrollPosition(QPoint(x, y)) self.mainFrame().setScrollPosition(QPoint(x, y))
def jump_to_anchor(self, anchor): def jump_to_anchor(self, anchor):
self.javascript('document.location.hash = "%s"'%anchor) self.javascript('paged_display.jump_to_anchor("%s")'%anchor)
def quantize(self):
if self.height > self.window_height:
r = self.height%self.window_height
if r > 0:
self.javascript('document.body.style.paddingBottom = "%dpx"'%r)
def element_ypos(self, elem): def element_ypos(self, elem):
ans, ok = elem.evaluateJavaScript('$(this).offset().top').toInt() ans, ok = elem.evaluateJavaScript('$(this).offset().top').toInt()
@ -314,15 +339,22 @@ class Document(QWebPage): # {{{
@dynamic_property @dynamic_property
def scroll_fraction(self): def scroll_fraction(self):
def fget(self): def fget(self):
try: if self.in_paged_mode:
return abs(float(self.ypos)/(self.height-self.window_height)) return self.javascript('paged_display.current_pos()',
except ZeroDivisionError: typ='float')
return 0. else:
try:
return abs(float(self.ypos)/(self.height-self.window_height))
except ZeroDivisionError:
return 0.
def fset(self, val): def fset(self, val):
npos = val * (self.height - self.window_height) if self.in_paged_mode:
if npos < 0: self.javascript('paged_display.scroll_to_pos(%f)'%val)
npos = 0 else:
self.scroll_to(x=self.xpos, y=npos) npos = val * (self.height - self.window_height)
if npos < 0:
npos = 0
self.scroll_to(x=self.xpos, y=npos)
return property(fget=fget, fset=fset) return property(fget=fget, fset=fset)
@property @property
@ -363,6 +395,7 @@ class DocumentView(QWebView): # {{{
DISABLED_BRUSH = QBrush(Qt.lightGray, Qt.Dense5Pattern) DISABLED_BRUSH = QBrush(Qt.lightGray, Qt.Dense5Pattern)
def initialize_view(self, debug_javascript=False): def initialize_view(self, debug_javascript=False):
self.setRenderHints(QPainter.Antialiasing|QPainter.TextAntialiasing|QPainter.SmoothPixmapTransform)
self.flipper = SlideFlip(self) self.flipper = SlideFlip(self)
self.is_auto_repeat_event = False self.is_auto_repeat_event = False
self.debug_javascript = debug_javascript self.debug_javascript = debug_javascript
@ -555,9 +588,11 @@ class DocumentView(QWebView): # {{{
return property(fget=fget, fset=fset) return property(fget=fget, fset=fset)
def search(self, text, backwards=False): def search(self, text, backwards=False):
if backwards: flags = self.document.FindBackward if backwards else self.document.FindFlags(0)
return self.findText(text, self.document.FindBackward) found = self.findText(text, flags)
return self.findText(text) if found and self.document.in_paged_mode:
self.document.javascript('paged_display.snap_to_selection()')
return found
def path(self): def path(self):
return os.path.abspath(unicode(self.url().toLocalFile())) return os.path.abspath(unicode(self.url().toLocalFile()))
@ -570,7 +605,7 @@ class DocumentView(QWebView): # {{{
if self.manager is not None: if self.manager is not None:
self.manager.load_started() self.manager.load_started()
load_html(path, self, codec=path.encoding, mime_type=getattr(path, load_html(path, self, codec=getattr(path, 'encoding', 'utf-8'), mime_type=getattr(path,
'mime_type', None), pre_load_callback=callback) 'mime_type', None), pre_load_callback=callback)
entries = set() entries = set()
for ie in getattr(path, 'index_entries', []): for ie in getattr(path, 'index_entries', []):
@ -579,10 +614,12 @@ class DocumentView(QWebView): # {{{
if ie.end_anchor: if ie.end_anchor:
entries.add(ie.end_anchor) entries.add(ie.end_anchor)
self.document.index_anchors = entries self.document.index_anchors = entries
self.turn_off_internal_scrollbars()
def initialize_scrollbar(self): def initialize_scrollbar(self):
if getattr(self, 'scrollbar', None) is not None: if getattr(self, 'scrollbar', None) is not None:
if self.document.in_paged_mode:
self.scrollbar.setVisible(False)
return
delta = self.document.width - self.size().width() delta = self.document.width - self.size().width()
if delta > 0: if delta > 0:
self._ignore_scrollbar_signals = True self._ignore_scrollbar_signals = True
@ -623,7 +660,6 @@ class DocumentView(QWebView): # {{{
self.manager.scrolled(self.document.scroll_fraction, self.manager.scrolled(self.document.scroll_fraction,
onload=True) onload=True)
self.turn_off_internal_scrollbars()
if self.flipper.isVisible(): if self.flipper.isVisible():
if self.flipper.running: if self.flipper.running:
self.flipper.setVisible(False) self.flipper.setVisible(False)
@ -631,12 +667,6 @@ class DocumentView(QWebView): # {{{
self.flipper(self.current_page_image(), self.flipper(self.current_page_image(),
duration=self.document.page_flip_duration) duration=self.document.page_flip_duration)
def turn_off_internal_scrollbars(self):
self.document.mainFrame().setScrollBarPolicy(Qt.Vertical, Qt.ScrollBarAlwaysOff)
self.document.mainFrame().setScrollBarPolicy(Qt.Horizontal, Qt.ScrollBarAlwaysOff)
@classmethod @classmethod
def test_line(cls, img, y): def test_line(cls, img, y):
'Test if line contains pixels of exactly the same color' 'Test if line contains pixels of exactly the same color'
@ -651,6 +681,7 @@ class DocumentView(QWebView): # {{{
overlap = self.height() overlap = self.height()
img = QImage(self.width(), overlap, QImage.Format_ARGB32_Premultiplied) img = QImage(self.width(), overlap, QImage.Format_ARGB32_Premultiplied)
painter = QPainter(img) painter = QPainter(img)
painter.setRenderHints(self.renderHints())
self.document.mainFrame().render(painter, QRegion(0, 0, self.width(), overlap)) self.document.mainFrame().render(painter, QRegion(0, 0, self.width(), overlap))
painter.end() painter.end()
return img return img
@ -670,6 +701,28 @@ class DocumentView(QWebView): # {{{
return return
epf = self.document.enable_page_flip and not self.is_auto_repeat_event epf = self.document.enable_page_flip and not self.is_auto_repeat_event
if self.document.in_paged_mode:
loc = self.document.javascript(
'paged_display.previous_screen_location()', typ='int')
if loc < 0:
if self.manager is not None:
if epf:
self.flipper.initialize(self.current_page_image(),
forwards=False)
self.manager.previous_document()
else:
if epf:
self.flipper.initialize(self.current_page_image(),
forwards=False)
self.document.scroll_to(x=loc, y=0)
if epf:
self.flipper(self.current_page_image(),
duration=self.document.page_flip_duration)
if self.manager is not None:
self.manager.scrolled(self.scroll_fraction)
return
delta_y = self.document.window_height - 25 delta_y = self.document.window_height - 25
if self.document.at_top: if self.document.at_top:
if self.manager is not None: if self.manager is not None:
@ -700,6 +753,26 @@ class DocumentView(QWebView): # {{{
return return
epf = self.document.enable_page_flip and not self.is_auto_repeat_event epf = self.document.enable_page_flip and not self.is_auto_repeat_event
if self.document.in_paged_mode:
loc = self.document.javascript(
'paged_display.next_screen_location()', typ='int')
if loc < 0:
if self.manager is not None:
if epf:
self.flipper.initialize(self.current_page_image())
self.manager.next_document()
else:
if epf:
self.flipper.initialize(self.current_page_image())
self.document.scroll_to(x=loc, y=0)
if epf:
self.flipper(self.current_page_image(),
duration=self.document.page_flip_duration)
if self.manager is not None:
self.manager.scrolled(self.scroll_fraction)
return
window_height = self.document.window_height window_height = self.document.window_height
document_height = self.document.height document_height = self.document.height
ddelta = document_height - window_height ddelta = document_height - window_height
@ -762,25 +835,38 @@ class DocumentView(QWebView): # {{{
#print 'After all:', self.document.ypos #print 'After all:', self.document.ypos
def scroll_by(self, x=0, y=0, notify=True): def scroll_by(self, x=0, y=0, notify=True):
old_pos = self.document.ypos old_pos = (self.document.xpos if self.document.in_paged_mode else
self.document.ypos)
self.document.scroll_by(x, y) self.document.scroll_by(x, y)
if notify and self.manager is not None and self.document.ypos != old_pos: new_pos = (self.document.xpos if self.document.in_paged_mode else
self.document.ypos)
if notify and self.manager is not None and new_pos != old_pos:
self.manager.scrolled(self.scroll_fraction) self.manager.scrolled(self.scroll_fraction)
def scroll_to(self, pos, notify=True): def scroll_to(self, pos, notify=True):
if self._ignore_scrollbar_signals: if self._ignore_scrollbar_signals:
return return
old_pos = self.document.ypos old_pos = (self.document.xpos if self.document.in_paged_mode else
if isinstance(pos, basestring): self.document.ypos)
self.document.jump_to_anchor(pos) if self.document.in_paged_mode:
else: if isinstance(pos, basestring):
if pos >= 1: self.document.jump_to_anchor(pos)
self.document.scroll_to(0, self.document.height)
else: else:
y = int(math.ceil( self.document.scroll_fraction = pos
pos*(self.document.height-self.document.window_height))) else:
self.document.scroll_to(0, y) if isinstance(pos, basestring):
if notify and self.manager is not None and self.document.ypos != old_pos: self.document.jump_to_anchor(pos)
else:
if pos >= 1:
self.document.scroll_to(0, self.document.height)
else:
y = int(math.ceil(
pos*(self.document.height-self.document.window_height)))
self.document.scroll_to(0, y)
new_pos = (self.document.xpos if self.document.in_paged_mode else
self.document.ypos)
if notify and self.manager is not None and new_pos != old_pos:
self.manager.scrolled(self.scroll_fraction) self.manager.scrolled(self.scroll_fraction)
@dynamic_property @dynamic_property
@ -813,9 +899,8 @@ class DocumentView(QWebView): # {{{
return QWebView.changeEvent(self, event) return QWebView.changeEvent(self, event)
def paintEvent(self, event): def paintEvent(self, event):
self.turn_off_internal_scrollbars()
painter = QPainter(self) painter = QPainter(self)
painter.setRenderHints(self.renderHints())
self.document.mainFrame().render(painter, event.region()) self.document.mainFrame().render(painter, event.region())
if not self.isEnabled(): if not self.isEnabled():
painter.fillRect(event.region().boundingRect(), self.DISABLED_BRUSH) painter.fillRect(event.region().boundingRect(), self.DISABLED_BRUSH)
@ -827,6 +912,18 @@ class DocumentView(QWebView): # {{{
if self.manager is not None and event.delta() != 0: if self.manager is not None and event.delta() != 0:
(self.manager.font_size_larger if event.delta() > 0 else (self.manager.font_size_larger if event.delta() > 0 else
self.manager.font_size_smaller)() self.manager.font_size_smaller)()
return
if self.document.in_paged_mode:
if abs(event.delta()) < 15: return
typ = 'screen' if self.document.wheel_flips_pages else 'col'
direction = 'next' if event.delta() < 0 else 'previous'
loc = self.document.javascript('paged_display.%s_%s_location()'%(
direction, typ), typ='int')
if loc > -1:
self.document.scroll_to(x=loc, y=0)
return
if event.delta() < -14: if event.delta() < -14:
if self.document.wheel_flips_pages: if self.document.wheel_flips_pages:
self.next_page() self.next_page()
@ -866,6 +963,17 @@ class DocumentView(QWebView): # {{{
if not self.handle_key_press(event): if not self.handle_key_press(event):
return QWebView.keyPressEvent(self, event) return QWebView.keyPressEvent(self, event)
def paged_col_scroll(self, forward=True):
dir = 'next' if forward else 'previous'
loc = self.document.javascript(
'paged_display.%s_col_location()'%dir, typ='int')
if loc > -1:
self.document.scroll_to(x=loc, y=0)
self.manager.scrolled(self.document.scroll_fraction)
else:
(self.manager.next_document() if forward else
self.manager.previous_document())
def handle_key_press(self, event): def handle_key_press(self, event):
handled = True handled = True
key = self.shortcuts.get_match(event) key = self.shortcuts.get_match(event)
@ -877,21 +985,33 @@ class DocumentView(QWebView): # {{{
finally: finally:
self.is_auto_repeat_event = False self.is_auto_repeat_event = False
elif key == 'Down': elif key == 'Down':
if (not self.document.line_scrolling_stops_on_pagebreaks and if self.document.in_paged_mode:
self.document.at_bottom): self.paged_col_scroll()
self.manager.next_document()
else: else:
self.scroll_by(y=15) if (not self.document.line_scrolling_stops_on_pagebreaks and
self.document.at_bottom):
self.manager.next_document()
else:
self.scroll_by(y=15)
elif key == 'Up': elif key == 'Up':
if (not self.document.line_scrolling_stops_on_pagebreaks and if self.document.in_paged_mode:
self.document.at_top): self.paged_col_scroll(forward=False)
self.manager.previous_document()
else: else:
self.scroll_by(y=-15) if (not self.document.line_scrolling_stops_on_pagebreaks and
self.document.at_top):
self.manager.previous_document()
else:
self.scroll_by(y=-15)
elif key == 'Left': elif key == 'Left':
self.scroll_by(x=-15) if self.document.in_paged_mode:
self.paged_col_scroll(forward=False)
else:
self.scroll_by(x=-15)
elif key == 'Right': elif key == 'Right':
self.scroll_by(x=15) if self.document.in_paged_mode:
self.paged_col_scroll()
else:
self.scroll_by(x=15)
else: else:
handled = False handled = False
return handled return handled

View File

@ -30,10 +30,11 @@ class JavaScriptLoader(object):
CS = { CS = {
'cfi':'ebooks.oeb.display.cfi', 'cfi':'ebooks.oeb.display.cfi',
'indexing':'ebooks.oeb.display.indexing', 'indexing':'ebooks.oeb.display.indexing',
'paged':'ebooks.oeb.display.paged',
} }
ORDER = ('jquery', 'jquery_scrollTo', 'bookmarks', 'referencing', 'images', ORDER = ('jquery', 'jquery_scrollTo', 'bookmarks', 'referencing', 'images',
'hyphenation', 'hyphenator', 'cfi', 'indexing',) 'hyphenation', 'hyphenator', 'cfi', 'indexing', 'paged')
def __init__(self, dynamic_coffeescript=False): def __init__(self, dynamic_coffeescript=False):

View File

@ -747,6 +747,7 @@ class EbookViewer(MainWindow, Ui_EbookViewer):
# There hasn't been a resize event for some time # There hasn't been a resize event for some time
# restore the current page position. # restore the current page position.
self.resize_in_progress = False self.resize_in_progress = False
self.view.document.after_resize()
if self.window_mode_changed: if self.window_mode_changed:
# This resize is part of a window mode change, special case it # This resize is part of a window mode change, special case it
self.handle_window_mode_toggle() self.handle_window_mode_toggle()
@ -1003,6 +1004,12 @@ def main(args=sys.argv):
QApplication.setApplicationName(APP_UID) QApplication.setApplicationName(APP_UID)
main = EbookViewer(args[1] if len(args) > 1 else None, main = EbookViewer(args[1] if len(args) > 1 else None,
debug_javascript=opts.debug_javascript, open_at=open_at) debug_javascript=opts.debug_javascript, open_at=open_at)
# This is needed for paged mode. Without it, the first document that is
# loaded will have extra blank space at the bottom, as
# turn_off_internal_scrollbars does not take effect for the first
# rendered document
main.view.load_path(P('viewer/blank.html', allow_user_override=False))
sys.excepthook = main.unhandled_exception sys.excepthook = main.unhandled_exception
main.show() main.show()
if opts.raise_window: if opts.raise_window:

View File

@ -57,4 +57,14 @@ class DBPrefs(dict):
def set(self, key, val): def set(self, key, val):
self.__setitem__(key, val) self.__setitem__(key, val)
def get_namespaced(self, namespace, key, default=None):
key = u'namespaced:%s:%s'%(namespace, key)
try:
return dict.__getitem__(self, key)
except KeyError:
return default
def set_namespaced(self, namespace, key, val):
key = u'namespaced:%s:%s'%(namespace, key)
self[key] = val