diff --git a/INSTALL b/INSTALL
index cb8261eff6..93b119b2e1 100644
--- a/INSTALL
+++ b/INSTALL
@@ -1,6 +1,9 @@
calibre supports installation from source, only on Linux.
-On Windows and OS X use the provided installers and use
-the facilities of the calibre-debug command to hack on the calibre source.
+
+Note that you *do not* need to install from source to hack on
+the calibre source code. To get started with calibre development,
+use a normal calibre install and follow the instructions at
+http://calibre-ebook.com/user_manual/develop.html
On Linux, there are two kinds of installation from source possible.
Note that both kinds require lots of dependencies as well as a
@@ -45,3 +48,4 @@ This type of install can be run with the command::
sudo python setup.py develop
Use the -h flag for help on the develop command.
+
diff --git a/README b/README
index 2c916fc7d7..b518e977c8 100644
--- a/README
+++ b/README
@@ -7,7 +7,7 @@ reading. It is cross platform, running on Linux, Windows and OS X.
For screenshots: https://calibre-ebook.com/demo
For installation/usage instructions please see
-http://calibre-ebook.com
+http://calibre-ebook.com/user_manual
For source code access:
bzr branch lp:calibre
diff --git a/src/calibre/ebooks/metadata/google_books.py b/src/calibre/ebooks/metadata/google_books.py
index 5a5e09234e..2e52bf020d 100644
--- a/src/calibre/ebooks/metadata/google_books.py
+++ b/src/calibre/ebooks/metadata/google_books.py
@@ -193,6 +193,7 @@ class ResultList(list):
def search(title=None, author=None, publisher=None, isbn=None,
min_viewability='none', verbose=False, max_results=40):
br = browser()
+ br.set_handle_gzip(True)
start, entries = 1, []
while start > 0 and len(entries) <= max_results:
new, start = Query(title=title, author=author, publisher=publisher,
diff --git a/src/calibre/ebooks/metadata/sources/amazon.py b/src/calibre/ebooks/metadata/sources/amazon.py
index 9334d818ec..61b555b041 100644
--- a/src/calibre/ebooks/metadata/sources/amazon.py
+++ b/src/calibre/ebooks/metadata/sources/amazon.py
@@ -23,7 +23,7 @@ from calibre.ebooks.metadata.book.base import Metadata
from calibre.library.comments import sanitize_comments_html
from calibre.utils.date import parse_date
-class Worker(Thread): # {{{
+class Worker(Thread): # Get details {{{
'''
Get book details from amazons book page in a separate thread
@@ -283,6 +283,7 @@ class Amazon(Source):
touched_fields = frozenset(['title', 'authors', 'identifier:amazon',
'identifier:isbn', 'rating', 'comments', 'publisher', 'pubdate'])
has_html_comments = True
+ supports_gzip_transfer_encoding = True
AMAZON_DOMAINS = {
'com': _('US'),
diff --git a/src/calibre/ebooks/metadata/sources/base.py b/src/calibre/ebooks/metadata/sources/base.py
index 08012c3ee8..5903a5e710 100644
--- a/src/calibre/ebooks/metadata/sources/base.py
+++ b/src/calibre/ebooks/metadata/sources/base.py
@@ -21,6 +21,7 @@ msprefs = JSONConfig('metadata_sources.json')
msprefs.defaults['txt_comments'] = False
msprefs.defaults['ignore_fields'] = []
msprefs.defaults['max_tags'] = 10
+msprefs.defaults['wait_after_first_identify_result'] = 30 # seconds
def create_log(ostream=None):
log = ThreadSafeLog(level=ThreadSafeLog.DEBUG)
@@ -92,6 +93,15 @@ class InternalMetadataCompareKeyGen(object):
# }}}
+def get_cached_cover_urls(mi):
+ from calibre.customize.ui import metadata_plugins
+ plugins = list(metadata_plugins['identify'])
+ for p in plugins:
+ url = p.get_cached_cover_url(mi.identifiers)
+ if url:
+ yield (p, url)
+
+
class Source(Plugin):
type = _('Metadata source')
@@ -110,6 +120,12 @@ class Source(Plugin):
#: Set this to True if your plugin return HTML formatted comments
has_html_comments = False
+ #: Setting this to True means that the browser object will add
+ #: Accept-Encoding: gzip to all requests. This can speedup downloads
+ #: but make sure that the source actually supports gzip transfer encoding
+ #: correctly first
+ supports_gzip_transfer_encoding = False
+
def __init__(self, *args, **kwargs):
Plugin.__init__(self, *args, **kwargs)
self._isbn_to_identifier_cache = {}
@@ -133,6 +149,8 @@ class Source(Plugin):
def browser(self):
if self._browser is None:
self._browser = browser(user_agent=random_user_agent())
+ if self.supports_gzip_transfer_encoding:
+ self._browser.set_handle_gzip(True)
return self._browser.clone_browser()
# }}}
diff --git a/src/calibre/ebooks/metadata/sources/google.py b/src/calibre/ebooks/metadata/sources/google.py
index 989320f710..21c99fdf46 100644
--- a/src/calibre/ebooks/metadata/sources/google.py
+++ b/src/calibre/ebooks/metadata/sources/google.py
@@ -160,6 +160,7 @@ class GoogleBooks(Source):
touched_fields = frozenset(['title', 'authors', 'tags', 'pubdate',
'comments', 'publisher', 'identifier:isbn', 'rating',
'identifier:google']) # language currently disabled
+ supports_gzip_transfer_encoding = True
GOOGLE_COVER = 'http://books.google.com/books?id=%s&printsec=frontcover&img=1'
diff --git a/src/calibre/ebooks/metadata/sources/identify.py b/src/calibre/ebooks/metadata/sources/identify.py
index ab86e8ffa2..71554595ad 100644
--- a/src/calibre/ebooks/metadata/sources/identify.py
+++ b/src/calibre/ebooks/metadata/sources/identify.py
@@ -21,9 +21,7 @@ from calibre.ebooks.metadata.book.base import Metadata
from calibre.utils.date import utc_tz
from calibre.utils.html2text import html2text
-# How long to wait for more results after first result is found
-WAIT_AFTER_FIRST_RESULT = 30 # seconds
-
+# Download worker {{{
class Worker(Thread):
def __init__(self, plugin, kwargs, abort):
@@ -47,99 +45,9 @@ def is_worker_alive(workers):
return True
return False
-def identify(log, abort, title=None, authors=None, identifiers=[], timeout=30):
- start_time = time.time()
- plugins = list(metadata_plugins['identify'])
-
- kwargs = {
- 'title': title,
- 'authors': authors,
- 'identifiers': identifiers,
- 'timeout': timeout,
- }
-
- log('Running identify query with parameters:')
- log(kwargs)
- log('Using plugins:', ', '.join([p.name for p in plugins]))
- log('The log (if any) from individual plugins is below')
-
- workers = [Worker(p, kwargs, abort) for p in plugins]
- for w in workers:
- w.start()
-
- first_result_at = None
- results = dict.fromkeys(plugins, [])
-
- def get_results():
- found = False
- for w in workers:
- try:
- result = w.rq.get_nowait()
- except Empty:
- pass
- else:
- results[w.plugin].append(result)
- found = True
- return found
-
- while True:
- time.sleep(0.2)
-
- if get_results() and first_result_at is None:
- first_result_at = time.time()
-
- if not is_worker_alive(workers):
- break
-
- if (first_result_at is not None and time.time() - first_result_at <
- WAIT_AFTER_FIRST_RESULT):
- log('Not waiting any longer for more results')
- abort.set()
- break
-
- get_results()
- sort_kwargs = dict(kwargs)
- for k in list(sort_kwargs.iterkeys()):
- if k not in ('title', 'authors', 'identifiers'):
- sort_kwargs.pop(k)
-
- for plugin, results in results.iteritems():
- results.sort(key=plugin.identify_results_keygen(**sort_kwargs))
- plog = plugin.buf.getvalue().strip()
- if plog:
- log('\n'+'*'*35, plugin.name, '*'*35)
- log('Found %d results'%len(results))
- log(plog)
- log('\n'+'*'*80)
-
- for i, result in enumerate(results):
- result.relevance_in_source = i
- result.has_cached_cover_url = \
- plugin.get_cached_cover_url(result.identifiers) is not None
- result.identify_plugin = plugin
-
- log('The identify phase took %.2f seconds'%(time.time() - start_time))
- log('Merging results from different sources and finding earliest',
- 'publication dates')
- start_time = time.time()
- results = merge_identify_results(results, log)
- log('We have %d merged results, merging took: %.2f seconds' %
- (len(results), time.time() - start_time))
-
- if msprefs['txt_comments']:
- for r in results:
- if r.plugin.has_html_comments and r.comments:
- r.comments = html2text(r.comments)
-
- dummy = Metadata(_('Unknown'))
- max_tags = msprefs['max_tags']
- for f in msprefs['ignore_fields']:
- for r in results:
- setattr(r, f, getattr(dummy, f))
- r.tags = r.tags[:max_tags]
-
- return results
+# }}}
+# Merge results from different sources {{{
class ISBNMerge(object):
@@ -298,6 +206,147 @@ def merge_identify_results(result_map, log):
return isbn_merge.finalize()
+# }}}
+def identify(log, abort, title=None, authors=None, identifiers=[], timeout=30):
+ start_time = time.time()
+ plugins = list(metadata_plugins['identify'])
+ kwargs = {
+ 'title': title,
+ 'authors': authors,
+ 'identifiers': identifiers,
+ 'timeout': timeout,
+ }
+
+ log('Running identify query with parameters:')
+ log(kwargs)
+ log('Using plugins:', ', '.join([p.name for p in plugins]))
+ log('The log (if any) from individual plugins is below')
+
+ workers = [Worker(p, kwargs, abort) for p in plugins]
+ for w in workers:
+ w.start()
+
+ first_result_at = None
+ results = dict.fromkeys(plugins, [])
+
+ def get_results():
+ found = False
+ for w in workers:
+ try:
+ result = w.rq.get_nowait()
+ except Empty:
+ pass
+ else:
+ results[w.plugin].append(result)
+ found = True
+ return found
+
+ wait_time = msprefs['wait_after_first_identify_result']
+ while True:
+ time.sleep(0.2)
+
+ if get_results() and first_result_at is None:
+ first_result_at = time.time()
+
+ if not is_worker_alive(workers):
+ break
+
+ if (first_result_at is not None and time.time() - first_result_at <
+ wait_time):
+ log('Not waiting any longer for more results')
+ abort.set()
+ break
+
+ get_results()
+ sort_kwargs = dict(kwargs)
+ for k in list(sort_kwargs.iterkeys()):
+ if k not in ('title', 'authors', 'identifiers'):
+ sort_kwargs.pop(k)
+
+ for plugin, results in results.iteritems():
+ results.sort(key=plugin.identify_results_keygen(**sort_kwargs))
+ plog = plugin.buf.getvalue().strip()
+ if plog:
+ log('\n'+'*'*35, plugin.name, '*'*35)
+ log('Found %d results'%len(results))
+ log(plog)
+ log('\n'+'*'*80)
+
+ for i, result in enumerate(results):
+ result.relevance_in_source = i
+ result.has_cached_cover_url = \
+ plugin.get_cached_cover_url(result.identifiers) is not None
+ result.identify_plugin = plugin
+
+ log('The identify phase took %.2f seconds'%(time.time() - start_time))
+ log('Merging results from different sources and finding earliest',
+ 'publication dates')
+ start_time = time.time()
+ results = merge_identify_results(results, log)
+ log('We have %d merged results, merging took: %.2f seconds' %
+ (len(results), time.time() - start_time))
+
+ if msprefs['txt_comments']:
+ for r in results:
+ if r.plugin.has_html_comments and r.comments:
+ r.comments = html2text(r.comments)
+
+ dummy = Metadata(_('Unknown'))
+ max_tags = msprefs['max_tags']
+ for f in msprefs['ignore_fields']:
+ for r in results:
+ setattr(r, f, getattr(dummy, f))
+ r.tags = r.tags[:max_tags]
+
+ return results
+
+if __name__ == '__main__': # tests {{{
+ # To run these test use: calibre-debug -e
+ # src/calibre/ebooks/metadata/sources/identify.py
+ from calibre.ebooks.metadata.sources.test import (test_identify,
+ title_test, authors_test)
+ test_identify(
+ [
+
+ ( # An e-book ISBN not on Amazon, one of the authors is
+ # unknown to Amazon
+ {'identifiers':{'isbn': '9780307459671'},
+ 'title':'Invisible Gorilla', 'authors':['Christopher Chabris']},
+ [title_test('The Invisible Gorilla: And Other Ways Our Intuitions Deceive Us',
+ exact=True), authors_test(['Christopher Chabris', 'Daniel Simons'])]
+
+ ),
+
+ ( # This isbn not on amazon
+ {'identifiers':{'isbn': '8324616489'}, 'title':'Learning Python',
+ 'authors':['Lutz']},
+ [title_test('Learning Python, 3rd Edition',
+ exact=True), authors_test(['Mark Lutz'])
+ ]
+
+ ),
+
+ ( # Sophisticated comment formatting
+ {'identifiers':{'isbn': '9781416580829'}},
+ [title_test('Angels & Demons - Movie Tie-In: A Novel',
+ exact=True), authors_test(['Dan Brown'])]
+ ),
+
+ ( # No specific problems
+ {'identifiers':{'isbn': '0743273567'}},
+ [title_test('The great gatsby', exact=True),
+ authors_test(['F. Scott Fitzgerald'])]
+ ),
+
+ ( # A newer book
+ {'identifiers':{'isbn': '9780316044981'}},
+ [title_test('The Heroes', exact=True),
+ authors_test(['Joe Abercrombie'])]
+
+ ),
+
+ ])
+# }}}
diff --git a/src/calibre/ebooks/metadata/sources/test.py b/src/calibre/ebooks/metadata/sources/test.py
index de95a9b887..a7dcc2fa14 100644
--- a/src/calibre/ebooks/metadata/sources/test.py
+++ b/src/calibre/ebooks/metadata/sources/test.py
@@ -14,7 +14,8 @@ from threading import Event
from calibre.customize.ui import metadata_plugins
from calibre import prints, sanitize_file_name2
from calibre.ebooks.metadata import check_isbn
-from calibre.ebooks.metadata.sources.base import create_log
+from calibre.ebooks.metadata.sources.base import (create_log,
+ get_cached_cover_urls)
def isbn_test(isbn):
isbn_ = check_isbn(isbn)
@@ -45,8 +46,75 @@ def authors_test(authors):
return test
+def init_test(tdir_name):
+ tdir = tempfile.gettempdir()
+ lf = os.path.join(tdir, tdir_name.replace(' ', '')+'_identify_test.txt')
+ log = create_log(open(lf, 'wb'))
+ abort = Event()
+ return tdir, lf, log, abort
-def test_identify_plugin(name, tests):
+def test_identify(tests): # {{{
+ '''
+ :param tests: List of 2-tuples. Each two tuple is of the form (args,
+ test_funcs). args is a dict of keyword arguments to pass to
+ the identify method. test_funcs are callables that accept a
+ Metadata object and return True iff the object passes the
+ test.
+ '''
+ from calibre.ebooks.metadata.sources.identify import identify
+
+ tdir, lf, log, abort = init_test('Full Identify')
+
+ times = []
+
+ for kwargs, test_funcs in tests:
+ prints('Running test with:', kwargs)
+ args = (log, abort)
+ start_time = time.time()
+ results = identify(*args, **kwargs)
+ total_time = time.time() - start_time
+ times.append(total_time)
+ if not results:
+ prints('identify failed to find any results')
+ break
+
+ prints('Found', len(results), 'matches:', end=' ')
+ prints('Smaller relevance means better match')
+
+ for i, mi in enumerate(results):
+ prints('*'*30, 'Relevance:', i, '*'*30)
+ prints(mi)
+ prints('\nCached cover URLs :',
+ [x[0].name for x in get_cached_cover_urls(mi)])
+ prints('*'*75, '\n\n')
+
+ possibles = []
+ for mi in results:
+ test_failed = False
+ for tfunc in test_funcs:
+ if not tfunc(mi):
+ test_failed = True
+ break
+ if not test_failed:
+ possibles.append(mi)
+
+ if not possibles:
+ prints('ERROR: No results that passed all tests were found')
+ prints('Log saved to', lf)
+ raise SystemExit(1)
+
+ if results[0] is not possibles[0]:
+ prints('Most relevant result failed the tests')
+ raise SystemExit(1)
+
+ prints('Average time per query', sum(times)/len(times))
+
+ if os.stat(lf).st_size > 10:
+ prints('There were some errors/warnings, see log', lf)
+
+# }}}
+
+def test_identify_plugin(name, tests): # {{{
'''
:param name: Plugin name
:param tests: List of 2-tuples. Each two tuple is of the form (args,
@@ -62,10 +130,7 @@ def test_identify_plugin(name, tests):
break
prints('Testing the identify function of', plugin.name)
- tdir = tempfile.gettempdir()
- lf = os.path.join(tdir, plugin.name.replace(' ', '')+'_identify_test.txt')
- log = create_log(open(lf, 'wb'))
- abort = Event()
+ tdir, lf, log, abort = init_test(plugin.name)
prints('Log saved to', lf)
times = []
@@ -159,4 +224,5 @@ def test_identify_plugin(name, tests):
if os.stat(lf).st_size > 10:
prints('There were some errors/warnings, see log', lf)
+# }}}
diff --git a/src/calibre/ebooks/oeb/stylizer.py b/src/calibre/ebooks/oeb/stylizer.py
index 0cd17387fe..42974be355 100644
--- a/src/calibre/ebooks/oeb/stylizer.py
+++ b/src/calibre/ebooks/oeb/stylizer.py
@@ -17,6 +17,8 @@ from cssutils.css import CSSStyleRule, CSSPageRule, CSSStyleDeclaration, \
from cssutils import profile as cssprofiles
from lxml import etree
from lxml.cssselect import css_to_xpath, ExpressionError, SelectorSyntaxError
+
+from calibre import force_unicode
from calibre.ebooks.oeb.base import XHTML, XHTML_NS, CSS_MIME, OEB_STYLES
from calibre.ebooks.oeb.base import XPNSMAP, xpath, urlnormalize
from calibre.ebooks.oeb.profile import PROFILES
@@ -140,13 +142,22 @@ class Stylizer(object):
log=logging.getLogger('calibre.css'))
self.font_face_rules = []
for elem in head:
- if elem.tag == XHTML('style') and elem.text \
- and elem.get('type', CSS_MIME) in OEB_STYLES:
- text = XHTML_CSS_NAMESPACE + elem.text
- text = oeb.css_preprocessor(text)
- stylesheet = parser.parseString(text, href=cssname)
- stylesheet.namespaces['h'] = XHTML_NS
- stylesheets.append(stylesheet)
+ if (elem.tag == XHTML('style') and
+ elem.get('type', CSS_MIME) in OEB_STYLES):
+ text = elem.text if elem.text else u''
+ for x in elem:
+ t = getattr(x, 'text', None)
+ if t:
+ text += u'\n\n' + force_unicode(t, u'utf-8')
+ t = getattr(x, 'tail', None)
+ if t:
+ text += u'\n\n' + force_unicode(t, u'utf-8')
+ if text:
+ text = XHTML_CSS_NAMESPACE + elem.text
+ text = oeb.css_preprocessor(text)
+ stylesheet = parser.parseString(text, href=cssname)
+ stylesheet.namespaces['h'] = XHTML_NS
+ stylesheets.append(stylesheet)
elif elem.tag == XHTML('link') and elem.get('href') \
and elem.get('rel', 'stylesheet').lower() == 'stylesheet' \
and elem.get('type', CSS_MIME).lower() in OEB_STYLES:
diff --git a/src/calibre/ebooks/pdf/fonts.cpp b/src/calibre/ebooks/pdf/fonts.cpp
index 99ab7517c1..c3a709869e 100644
--- a/src/calibre/ebooks/pdf/fonts.cpp
+++ b/src/calibre/ebooks/pdf/fonts.cpp
@@ -72,6 +72,7 @@ XMLFont::XMLFont(string* font_name, double size, GfxRGB rgb) :
size(size-1), line_size(-1.0), italic(false), bold(false), font_name(font_name),
font_family(NULL), color(rgb) {
+
if (!this->font_name) this->font_name = new string(DEFAULT_FONT_FAMILY);
this->font_family = family_name(this->font_name);
if (strcasestr(font_name->c_str(), "bold")) this->bold = true;
@@ -134,7 +135,12 @@ Fonts::size_type Fonts::add_font(XMLFont *f) {
}
Fonts::size_type Fonts::add_font(string* font_name, double size, GfxRGB rgb) {
- XMLFont *f = new XMLFont(font_name, size, rgb);
+ XMLFont *f = NULL;
+ if (font_name == NULL)
+ font_name = new string("Unknown");
+ // font_name must not be deleted
+ f = new XMLFont(font_name, size, rgb);
+
return this->add_font(f);
}
diff --git a/src/calibre/gui2/dialogs/book_info.ui b/src/calibre/gui2/dialogs/book_info.ui
index 412126a610..9e9e71eda0 100644
--- a/src/calibre/gui2/dialogs/book_info.ui
+++ b/src/calibre/gui2/dialogs/book_info.ui
@@ -7,15 +7,25 @@
0
0
917
- 480
+ 492
Dialog
+
+
+ :/images/metadata.png:/images/metadata.png
+
-
+
+
+ 75
+ true
+
+
TextLabel
@@ -24,86 +34,104 @@
- -
+
-
-
-
-
-
-
-
- TextLabel
-
-
- Qt::AlignLeading|Qt::AlignLeft|Qt::AlignTop
-
-
- true
-
-
-
- -
-
-
- Comments
-
-
-
-
-
-
-
- 0
- 0
-
-
-
-
- 350
- 16777215
-
-
-
-
- about:blank
-
-
-
-
-
-
-
- -
-
-
- Fit &cover within view
-
-
-
- -
-
+
+
+ QFrame::NoFrame
+
+
+ true
+
+
+
+
+ 0
+ 0
+ 435
+ 670
+
+
+
-
-
+
- &Previous
+ TextLabel
-
-
- :/images/previous.png:/images/previous.png
+
+ Qt::AlignLeading|Qt::AlignLeft|Qt::AlignTop
+
+
+ true
-
-
-
- &Next
-
-
-
- :/images/next.png:/images/next.png
+
+
+ Comments
+
+
-
+
+
+
+ 0
+ 0
+
+
+
+
+ 350
+ 16777215
+
+
+
+
+ about:blank
+
+
+
+
+
+
+
+
+ -
+
+
+ Fit &cover within view
+
+
+
+ -
+
+
-
+
+
+ &Previous
+
+
+
+ :/images/previous.png:/images/previous.png
+
+
+
+ -
+
+
+ &Next
+
+
+
+ :/images/next.png:/images/next.png
+
+
diff --git a/src/calibre/utils/browser.py b/src/calibre/utils/browser.py
index 2f77ede6b3..6f8703ab49 100644
--- a/src/calibre/utils/browser.py
+++ b/src/calibre/utils/browser.py
@@ -38,10 +38,10 @@ class Browser(B):
self._clone_actions['set_handle_equiv'] = ('set_handle_equiv',
args, kwargs)
- def set_handle_gzip(self, *args, **kwargs):
- B.set_handle_gzip(self, *args, **kwargs)
+ def set_handle_gzip(self, handle):
+ B._set_handler(self, '_gzip', handle)
self._clone_actions['set_handle_gzip'] = ('set_handle_gzip',
- args, kwargs)
+ (handle,), {})
def set_debug_redirect(self, *args, **kwargs):
B.set_debug_redirect(self, *args, **kwargs)