From 6be7471d2e7d93793de6e25e7e9222cb82b49cc4 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Mon, 4 Apr 2011 08:02:28 -0600 Subject: [PATCH 01/57] F-Secure by louhike and more work on the new metadata download system --- recipes/f_secure.recipe | 22 +++ src/calibre/ebooks/metadata/sources/amazon.py | 1 + src/calibre/ebooks/metadata/sources/base.py | 6 + .../ebooks/metadata/sources/identify.py | 172 ++++++++++++++++-- 4 files changed, 186 insertions(+), 15 deletions(-) create mode 100644 recipes/f_secure.recipe diff --git a/recipes/f_secure.recipe b/recipes/f_secure.recipe new file mode 100644 index 0000000000..f276a4961a --- /dev/null +++ b/recipes/f_secure.recipe @@ -0,0 +1,22 @@ +from calibre.web.feeds.news import BasicNewsRecipe + +class AdvancedUserRecipe1301860159(BasicNewsRecipe): + title = u'F-Secure Weblog' + language = 'en' + __author__ = 'louhike' + description = u'All the news from the weblog of F-Secure' + publisher = u'F-Secure' + timefmt = ' [%a, %d %b, %Y]' + encoding = 'ISO-8859-1' + oldest_article = 7 + max_articles_per_feed = 100 + no_stylesheets = True + use_embedded_content = False + language = 'en_EN' + remove_javascript = True + keep_only_tags = [dict(name='div', attrs={'class':'modSectionTd2'})] + remove_tags = [dict(name='a'),dict(name='hr')] + + feeds = [(u'Weblog', u'http://www.f-secure.com/weblog/weblog.rss')] + def get_cover_url(self): + return 'http://www.f-secure.com/weblog/archives/images/company_logo.png' diff --git a/src/calibre/ebooks/metadata/sources/amazon.py b/src/calibre/ebooks/metadata/sources/amazon.py index cfa2b09ea8..9334d818ec 100644 --- a/src/calibre/ebooks/metadata/sources/amazon.py +++ b/src/calibre/ebooks/metadata/sources/amazon.py @@ -282,6 +282,7 @@ class Amazon(Source): capabilities = frozenset(['identify', 'cover']) touched_fields = frozenset(['title', 'authors', 'identifier:amazon', 'identifier:isbn', 'rating', 'comments', 'publisher', 'pubdate']) + has_html_comments = True AMAZON_DOMAINS = { 'com': _('US'), diff --git a/src/calibre/ebooks/metadata/sources/base.py b/src/calibre/ebooks/metadata/sources/base.py index 7cc4ed3518..08012c3ee8 100644 --- a/src/calibre/ebooks/metadata/sources/base.py +++ b/src/calibre/ebooks/metadata/sources/base.py @@ -18,6 +18,9 @@ from calibre.utils.titlecase import titlecase from calibre.ebooks.metadata import check_isbn msprefs = JSONConfig('metadata_sources.json') +msprefs.defaults['txt_comments'] = False +msprefs.defaults['ignore_fields'] = [] +msprefs.defaults['max_tags'] = 10 def create_log(ostream=None): log = ThreadSafeLog(level=ThreadSafeLog.DEBUG) @@ -104,6 +107,9 @@ class Source(Plugin): #: during the identify phase touched_fields = frozenset() + #: Set this to True if your plugin return HTML formatted comments + has_html_comments = False + def __init__(self, *args, **kwargs): Plugin.__init__(self, *args, **kwargs) self._isbn_to_identifier_cache = {} diff --git a/src/calibre/ebooks/metadata/sources/identify.py b/src/calibre/ebooks/metadata/sources/identify.py index 1d4d8840e8..ab86e8ffa2 100644 --- a/src/calibre/ebooks/metadata/sources/identify.py +++ b/src/calibre/ebooks/metadata/sources/identify.py @@ -8,13 +8,18 @@ __copyright__ = '2011, Kovid Goyal ' __docformat__ = 'restructuredtext en' import time +from datetime import datetime from Queue import Queue, Empty from threading import Thread from io import BytesIO +from operator import attrgetter from calibre.customize.ui import metadata_plugins -from calibre.ebooks.metadata.sources.base import create_log +from calibre.ebooks.metadata.sources.base import create_log, msprefs from calibre.ebooks.metadata.xisbn import xisbn +from calibre.ebooks.metadata.book.base import Metadata +from calibre.utils.date import utc_tz +from calibre.utils.html2text import html2text # How long to wait for more results after first result is found WAIT_AFTER_FIRST_RESULT = 30 # seconds @@ -117,14 +122,30 @@ def identify(log, abort, title=None, authors=None, identifiers=[], timeout=30): log('Merging results from different sources and finding earliest', 'publication dates') start_time = time.time() - merged_results = merge_identify_results(results, log) + results = merge_identify_results(results, log) log('We have %d merged results, merging took: %.2f seconds' % - (len(merged_results), time.time() - start_time)) + (len(results), time.time() - start_time)) + + if msprefs['txt_comments']: + for r in results: + if r.plugin.has_html_comments and r.comments: + r.comments = html2text(r.comments) + + dummy = Metadata(_('Unknown')) + max_tags = msprefs['max_tags'] + for f in msprefs['ignore_fields']: + for r in results: + setattr(r, f, getattr(dummy, f)) + r.tags = r.tags[:max_tags] + + return results + class ISBNMerge(object): def __init__(self): self.pools = {} + self.isbnless_results = [] def isbn_in_pool(self, isbn): if isbn: @@ -140,22 +161,143 @@ class ISBNMerge(object): return True return False - def add_result(self, result, isbn): - pool = self.isbn_in_pool(isbn) - if pool is None: - isbns, min_year = xisbn.get_isbn_pool(isbn) - if not isbns: - isbns = frozenset([isbn]) - self.pool[isbns] = pool = (min_year, []) + def add_result(self, result): + isbn = result.isbn + if isbn: + pool = self.isbn_in_pool(isbn) + if pool is None: + isbns, min_year = xisbn.get_isbn_pool(isbn) + if not isbns: + isbns = frozenset([isbn]) + self.pool[isbns] = pool = (min_year, []) + + if not self.pool_has_result_from_same_source(pool, result): + pool[1].append(result) + else: + self.isbnless_results.append(result) + + def finalize(self): + has_isbn_result = False + for results in self.pools.itervalues(): + if results: + has_isbn_result = True + break + self.has_isbn_result = has_isbn_result + + if has_isbn_result: + self.merge_isbn_results() + else: + self.results = sorted(self.isbnless_results, + key=attrgetter('relevance_in_source')) + + return self.results + + def merge_isbn_results(self): + self.results = [] + for min_year, results in self.pool.itervalues(): + if results: + self.results.append(self.merge(results, min_year)) + + self.results.sort(key=attrgetter('average_source_relevance')) + + def length_merge(self, attr, results, null_value=None, shortest=True): + values = [getattr(x, attr) for x in results if not x.is_null(attr)] + values = [x for x in values if len(x) > 0] + if not values: + return null_value + values.sort(key=len, reverse=not shortest) + return values[0] + + def random_merge(self, attr, results, null_value=None): + values = [getattr(x, attr) for x in results if not x.is_null(attr)] + return values[0] if values else null_value + + def merge(self, results, min_year): + ans = Metadata(_('Unknown')) + + # We assume the shortest title has the least cruft in it + ans.title = self.length_merge('title', results, null_value=ans.title) + + # No harm in having extra authors, maybe something useful like an + # editor or translator + ans.authors = self.length_merge('authors', results, + null_value=ans.authors, shortest=False) + + # We assume the shortest publisher has the least cruft in it + ans.publisher = self.length_merge('publisher', results, + null_value=ans.publisher) + + # We assume the smallest set of tags has the least cruft in it + ans.tags = self.length_merge('tags', results, + null_value=ans.tags) + + # We assume the longest series has the most info in it + ans.series = self.length_merge('series', results, + null_value=ans.series, shortest=False) + for r in results: + if r.series and r.series == ans.series: + ans.series_index = r.series_index + break + + # Average the rating over all sources + ratings = [] + for r in results: + rating = r.rating + if rating and rating > 0 and rating <= 5: + ratings.append(rating) + if ratings: + ans.rating = sum(ratings)/len(ratings) + + # Smallest language is likely to be valid + ans.language = self.length_merge('language', results, + null_value=ans.language) + + # Choose longest comments + ans.comments = self.length_merge('comments', results, + null_value=ans.comments, shortest=False) + + # Published date + if min_year: + min_date = datetime(min_year, 1, 2, tzinfo=utc_tz) + ans.pubdate = min_date + else: + min_date = datetime(10000, 1, 1, tzinfo=utc_tz) + for r in results: + if r.pubdate is not None and r.pubdate < min_date: + min_date = r.pubdate + if min_date.year < 10000: + ans.pubdate = min_date + + # Identifiers + for r in results: + ans.identifiers.update(r.identifiers) + + # Merge any other fields with no special handling (random merge) + touched_fields = set() + for r in results: + touched_fields |= r.plugin.touched_fields + + for f in touched_fields: + if f.startswith('identifier:') or not ans.is_null(f): + continue + setattr(ans, f, self.random_merge(f, results, + null_value=getattr(ans, f))) + + avg = [x.relevance_in_source for x in results] + avg = sum(avg)/len(avg) + ans.average_source_relevance = avg + + return ans - if not self.pool_has_result_from_same_source(pool, result): - pool[1].append(result) def merge_identify_results(result_map, log): + isbn_merge = ISBNMerge() for plugin, results in result_map.iteritems(): for result in results: - isbn = result.isbn - if isbn: - isbns, min_year = xisbn.get_isbn_pool(isbn) + isbn_merge.add_result(result) + + return isbn_merge.finalize() + + From 7599a89c472d92cd29afdbf33f7c6faa7526211c Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Mon, 4 Apr 2011 09:32:32 -0600 Subject: [PATCH 02/57] Fix #750336 (Pocketbook 602/902 2.0.6 FW won't connect) --- src/calibre/devices/eb600/driver.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/calibre/devices/eb600/driver.py b/src/calibre/devices/eb600/driver.py index 5374c6c4e2..01277980db 100644 --- a/src/calibre/devices/eb600/driver.py +++ b/src/calibre/devices/eb600/driver.py @@ -244,7 +244,8 @@ class POCKETBOOK602(USBMS): BCD = [0x0324] VENDOR_NAME = '' - WINDOWS_MAIN_MEM = WINDOWS_CARD_A_MEM = ['PB602', 'PB603', 'PB902', 'PB903'] + WINDOWS_MAIN_MEM = WINDOWS_CARD_A_MEM = ['PB602', 'PB603', 'PB902', + 'PB903', 'PB'] class POCKETBOOK701(USBMS): From 4b7bc8ce365d99a87ce03cd614b3e8e3f5fceb62 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Mon, 4 Apr 2011 10:04:51 -0600 Subject: [PATCH 03/57] Fix #750288 (TimesofIndia news fetch not working) --- recipes/toi.recipe | 29 +++++++++++++++++++++++++---- 1 file changed, 25 insertions(+), 4 deletions(-) diff --git a/recipes/toi.recipe b/recipes/toi.recipe index 643d120a36..8a772b6f9d 100644 --- a/recipes/toi.recipe +++ b/recipes/toi.recipe @@ -1,3 +1,4 @@ +import re from calibre.web.feeds.news import BasicNewsRecipe class TimesOfIndia(BasicNewsRecipe): @@ -8,10 +9,10 @@ class TimesOfIndia(BasicNewsRecipe): max_articles_per_feed = 25 no_stylesheets = True - keep_only_tags = [dict(attrs={'class':'maintable12'})] + keep_only_tags = [{'class':['maintable12', 'prttabl']}] remove_tags = [ dict(style=lambda x: x and 'float' in x), - dict(attrs={'class':'prvnxtbg'}), + {'class':['prvnxtbg', 'footbdrin', 'bcclftr']}, ] feeds = [ @@ -38,8 +39,28 @@ class TimesOfIndia(BasicNewsRecipe): ('Most Read', 'http://timesofindia.indiatimes.com/rssfeedmostread.cms') ] - def print_version(self, url): - return url + '?prtpage=1' + + def get_article_url(self, article): + url = BasicNewsRecipe.get_article_url(self, article) + if '/0Ltimesofindia' in url: + url = url.partition('/0L')[-1] + url = url.replace('0B', '.').replace('0N', '.com').replace('0C', + '/').replace('0E', '-') + url = 'http://' + url.rpartition('/')[0] + match = re.search(r'/([0-9a-zA-Z]+?)\.cms', url) + if match is not None: + num = match.group(1) + num = re.sub(r'[^0-9]', '', num) + return ('http://timesofindia.indiatimes.com/articleshow/%s.cms?prtpage=1' % + num) + else: + cms = re.search(r'/(\d+)\.cms', url) + if cms is not None: + return ('http://timesofindia.indiatimes.com/articleshow/%s.cms?prtpage=1' % + cms.group(1)) + + return url + def preprocess_html(self, soup): return soup From 7d1c706835bbc17990596804e232233272fc5796 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Mon, 4 Apr 2011 10:41:43 -0600 Subject: [PATCH 04/57] Fix #750101 (Private bug) --- src/calibre/ebooks/pdf/fonts.cpp | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/src/calibre/ebooks/pdf/fonts.cpp b/src/calibre/ebooks/pdf/fonts.cpp index 99ab7517c1..9b9e7708a3 100644 --- a/src/calibre/ebooks/pdf/fonts.cpp +++ b/src/calibre/ebooks/pdf/fonts.cpp @@ -72,6 +72,7 @@ XMLFont::XMLFont(string* font_name, double size, GfxRGB rgb) : size(size-1), line_size(-1.0), italic(false), bold(false), font_name(font_name), font_family(NULL), color(rgb) { + if (!this->font_name) this->font_name = new string(DEFAULT_FONT_FAMILY); this->font_family = family_name(this->font_name); if (strcasestr(font_name->c_str(), "bold")) this->bold = true; @@ -134,7 +135,15 @@ Fonts::size_type Fonts::add_font(XMLFont *f) { } Fonts::size_type Fonts::add_font(string* font_name, double size, GfxRGB rgb) { - XMLFont *f = new XMLFont(font_name, size, rgb); + XMLFont *f = NULL; + if (font_name == NULL) { + string *fn = new string("Unknown"); + f = new XMLFont(fn, size, rgb); + // fn must not be deleted + } else { + f = new XMLFont(font_name, size, rgb); + } + return this->add_font(f); } From 83175da4b297af6c46954ded3b4cd4f476302104 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Mon, 4 Apr 2011 10:59:57 -0600 Subject: [PATCH 05/57] ... --- src/calibre/ebooks/pdf/fonts.cpp | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/src/calibre/ebooks/pdf/fonts.cpp b/src/calibre/ebooks/pdf/fonts.cpp index 9b9e7708a3..c5261298ff 100644 --- a/src/calibre/ebooks/pdf/fonts.cpp +++ b/src/calibre/ebooks/pdf/fonts.cpp @@ -136,13 +136,9 @@ Fonts::size_type Fonts::add_font(XMLFont *f) { Fonts::size_type Fonts::add_font(string* font_name, double size, GfxRGB rgb) { XMLFont *f = NULL; - if (font_name == NULL) { - string *fn = new string("Unknown"); - f = new XMLFont(fn, size, rgb); - // fn must not be deleted - } else { - f = new XMLFont(font_name, size, rgb); - } + if (font_name == NULL) + font_name = new string("Unknown"); + f = new XMLFont(font_name, size, rgb); return this->add_font(f); } From 3e1a43e86a50f06d7f71291825b3475db0d73de8 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Mon, 4 Apr 2011 11:00:20 -0600 Subject: [PATCH 06/57] ... --- src/calibre/ebooks/pdf/fonts.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/src/calibre/ebooks/pdf/fonts.cpp b/src/calibre/ebooks/pdf/fonts.cpp index c5261298ff..c3a709869e 100644 --- a/src/calibre/ebooks/pdf/fonts.cpp +++ b/src/calibre/ebooks/pdf/fonts.cpp @@ -138,6 +138,7 @@ Fonts::size_type Fonts::add_font(string* font_name, double size, GfxRGB rgb) { XMLFont *f = NULL; if (font_name == NULL) font_name = new string("Unknown"); + // font_name must not be deleted f = new XMLFont(font_name, size, rgb); return this->add_font(f); From d1859b0f784e972e0ff8af16e7b1afbb9f455c4d Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Mon, 4 Apr 2011 11:14:12 -0600 Subject: [PATCH 07/57] ... --- src/calibre/ebooks/metadata/sources/base.py | 1 + .../ebooks/metadata/sources/identify.py | 194 +++++++++--------- 2 files changed, 100 insertions(+), 95 deletions(-) diff --git a/src/calibre/ebooks/metadata/sources/base.py b/src/calibre/ebooks/metadata/sources/base.py index 08012c3ee8..d306a02bcb 100644 --- a/src/calibre/ebooks/metadata/sources/base.py +++ b/src/calibre/ebooks/metadata/sources/base.py @@ -21,6 +21,7 @@ msprefs = JSONConfig('metadata_sources.json') msprefs.defaults['txt_comments'] = False msprefs.defaults['ignore_fields'] = [] msprefs.defaults['max_tags'] = 10 +msprefs.defaults['wait_after_first_identify_result'] = 30 # seconds def create_log(ostream=None): log = ThreadSafeLog(level=ThreadSafeLog.DEBUG) diff --git a/src/calibre/ebooks/metadata/sources/identify.py b/src/calibre/ebooks/metadata/sources/identify.py index ab86e8ffa2..87d34c0bff 100644 --- a/src/calibre/ebooks/metadata/sources/identify.py +++ b/src/calibre/ebooks/metadata/sources/identify.py @@ -21,9 +21,7 @@ from calibre.ebooks.metadata.book.base import Metadata from calibre.utils.date import utc_tz from calibre.utils.html2text import html2text -# How long to wait for more results after first result is found -WAIT_AFTER_FIRST_RESULT = 30 # seconds - +# Download worker {{{ class Worker(Thread): def __init__(self, plugin, kwargs, abort): @@ -47,99 +45,9 @@ def is_worker_alive(workers): return True return False -def identify(log, abort, title=None, authors=None, identifiers=[], timeout=30): - start_time = time.time() - plugins = list(metadata_plugins['identify']) - - kwargs = { - 'title': title, - 'authors': authors, - 'identifiers': identifiers, - 'timeout': timeout, - } - - log('Running identify query with parameters:') - log(kwargs) - log('Using plugins:', ', '.join([p.name for p in plugins])) - log('The log (if any) from individual plugins is below') - - workers = [Worker(p, kwargs, abort) for p in plugins] - for w in workers: - w.start() - - first_result_at = None - results = dict.fromkeys(plugins, []) - - def get_results(): - found = False - for w in workers: - try: - result = w.rq.get_nowait() - except Empty: - pass - else: - results[w.plugin].append(result) - found = True - return found - - while True: - time.sleep(0.2) - - if get_results() and first_result_at is None: - first_result_at = time.time() - - if not is_worker_alive(workers): - break - - if (first_result_at is not None and time.time() - first_result_at < - WAIT_AFTER_FIRST_RESULT): - log('Not waiting any longer for more results') - abort.set() - break - - get_results() - sort_kwargs = dict(kwargs) - for k in list(sort_kwargs.iterkeys()): - if k not in ('title', 'authors', 'identifiers'): - sort_kwargs.pop(k) - - for plugin, results in results.iteritems(): - results.sort(key=plugin.identify_results_keygen(**sort_kwargs)) - plog = plugin.buf.getvalue().strip() - if plog: - log('\n'+'*'*35, plugin.name, '*'*35) - log('Found %d results'%len(results)) - log(plog) - log('\n'+'*'*80) - - for i, result in enumerate(results): - result.relevance_in_source = i - result.has_cached_cover_url = \ - plugin.get_cached_cover_url(result.identifiers) is not None - result.identify_plugin = plugin - - log('The identify phase took %.2f seconds'%(time.time() - start_time)) - log('Merging results from different sources and finding earliest', - 'publication dates') - start_time = time.time() - results = merge_identify_results(results, log) - log('We have %d merged results, merging took: %.2f seconds' % - (len(results), time.time() - start_time)) - - if msprefs['txt_comments']: - for r in results: - if r.plugin.has_html_comments and r.comments: - r.comments = html2text(r.comments) - - dummy = Metadata(_('Unknown')) - max_tags = msprefs['max_tags'] - for f in msprefs['ignore_fields']: - for r in results: - setattr(r, f, getattr(dummy, f)) - r.tags = r.tags[:max_tags] - - return results +# }}} +# Merge results from different sources {{{ class ISBNMerge(object): @@ -298,6 +206,102 @@ def merge_identify_results(result_map, log): return isbn_merge.finalize() +# }}} + +def identify(log, abort, title=None, authors=None, identifiers=[], timeout=30): + start_time = time.time() + plugins = list(metadata_plugins['identify']) + + kwargs = { + 'title': title, + 'authors': authors, + 'identifiers': identifiers, + 'timeout': timeout, + } + + log('Running identify query with parameters:') + log(kwargs) + log('Using plugins:', ', '.join([p.name for p in plugins])) + log('The log (if any) from individual plugins is below') + + workers = [Worker(p, kwargs, abort) for p in plugins] + for w in workers: + w.start() + + first_result_at = None + results = dict.fromkeys(plugins, []) + + def get_results(): + found = False + for w in workers: + try: + result = w.rq.get_nowait() + except Empty: + pass + else: + results[w.plugin].append(result) + found = True + return found + + wait_time = msprefs['wait_after_first_identify_result'] + while True: + time.sleep(0.2) + + if get_results() and first_result_at is None: + first_result_at = time.time() + + if not is_worker_alive(workers): + break + + if (first_result_at is not None and time.time() - first_result_at < + wait_time): + log('Not waiting any longer for more results') + abort.set() + break + + get_results() + sort_kwargs = dict(kwargs) + for k in list(sort_kwargs.iterkeys()): + if k not in ('title', 'authors', 'identifiers'): + sort_kwargs.pop(k) + + for plugin, results in results.iteritems(): + results.sort(key=plugin.identify_results_keygen(**sort_kwargs)) + plog = plugin.buf.getvalue().strip() + if plog: + log('\n'+'*'*35, plugin.name, '*'*35) + log('Found %d results'%len(results)) + log(plog) + log('\n'+'*'*80) + + for i, result in enumerate(results): + result.relevance_in_source = i + result.has_cached_cover_url = \ + plugin.get_cached_cover_url(result.identifiers) is not None + result.identify_plugin = plugin + + log('The identify phase took %.2f seconds'%(time.time() - start_time)) + log('Merging results from different sources and finding earliest', + 'publication dates') + start_time = time.time() + results = merge_identify_results(results, log) + log('We have %d merged results, merging took: %.2f seconds' % + (len(results), time.time() - start_time)) + + if msprefs['txt_comments']: + for r in results: + if r.plugin.has_html_comments and r.comments: + r.comments = html2text(r.comments) + + dummy = Metadata(_('Unknown')) + max_tags = msprefs['max_tags'] + for f in msprefs['ignore_fields']: + for r in results: + setattr(r, f, getattr(dummy, f)) + r.tags = r.tags[:max_tags] + + return results + From ac3693cfdc586b6c3f89bb5841d6fc881d3c6b7c Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Mon, 4 Apr 2011 11:36:26 -0600 Subject: [PATCH 08/57] Conversion pipeline: Handle inline + + +
+ %%s +
+ + + '''%(f, c) + self.setHtml(templ%html) +# }}} + +class IdentifyWidget(QWidget): + + def __init__(self, parent=None): + QWidget.__init__(self, parent) + + self.l = l = QGridLayout() + self.setLayout(l) + + names = [''+p.name+'' for p in metadata_plugins(['identify']) if + p.is_configured()] + self.top = QLabel('

'+_('calibre is downloading metadata from: ') + + ', '.join(names)) + self.top.setWordWrap(True) + l.addWidget(self.top, 0, 0) + + self.results_view = ResultsView(self) + l.addWidget(self.results_view, 1, 0) + + self.comments_view = Comments(self) + l.addWidget(self.comments_view, 1, 1) + + self.query = QLabel('download starting...') + f = self.query.font() + f.setPointSize(f.pointSize()-2) + self.query.setFont(f) + self.query.setWordWrap(True) + l.addWidget(self.query, 2, 0, 1, 2) + + def start(self, title=None, authors=None, identifiers={}): + parts = [] + if title: + parts.append('title:'+title) + if authors: + parts.append('authors:'+authors_to_string(authors)) + if identifiers: + x = ', '.join('%s:%s'%(k, v) for k, v in identifiers) + parts.append(x) + self.query.setText(_('Query: ')+'; '.join(parts)) + self.comments_view.show_data('

'+_('Downloading, please wait')+ + '.

'+ + ''' + + ''') + +class FullFetch(QDialog): # {{{ + + def __init__(self, parent=None): + QDialog.__init__(self, parent) + + self.setWindowTitle(_('Downloading metadata...')) + self.setWindowIcon(QIcon(I('metadata.png'))) + + self.stack = QStackedWidget() + self.l = l = QVBoxLayout() + self.setLayout(l) + l.addWidget(self.stack) + + self.bb = QDialogButtonBox(QDialogButtonBox.Cancel) + l.addWidget(self.bb) + self.bb.rejected.connect(self.reject) + + self.identify_widget = IdentifyWidget(self) + self.stack.addWidget(self.identify_widget) + self.resize(850, 500) + + def accept(self): + # Prevent pressing Enter from closing the dialog + pass + + def start(self, title=None, authors=None, identifiers={}): + self.identify_widget.start(title=title, authors=authors, + identifiers=identifiers) + self.exec_() +# }}} + +if __name__ == '__main__': + app = QApplication([]) + d = FullFetch() + d.start(title='great gatsby', authors=['Fitzgerald']) + diff --git a/src/calibre/manual/server.rst b/src/calibre/manual/server.rst index 6d1adc88cd..82ec5c2927 100644 --- a/src/calibre/manual/server.rst +++ b/src/calibre/manual/server.rst @@ -16,7 +16,7 @@ Here, we will show you how to integrate the |app| content server into another se Using a reverse proxy ----------------------- -This is the simplest approach as it allows you to use the binary calibre install with no external dependencies/system integration requirements. +A reverse proxy is when your normal server accepts incoming requests and passes them onto the calibre server. It then reads the response from the calibre server and forwards it to the client. This means that you can simply run the calibre server as normal without trying to integrate it closely with your main server, and you can take advantage of whatever authentication systems you main server has in place. This is the simplest approach as it allows you to use the binary calibre install with no external dependencies/system integration requirements. Below, is an example of how to achieve this with Apache as your main server, but it will work with any server that supports Reverse Proxies. First start the |app| content server as shown below:: @@ -33,7 +33,7 @@ The exact technique for enabling the proxy modules will vary depending on your A RewriteRule ^/calibre/(.*) http://localhost:8080/calibre/$1 [proxy] RewriteRule ^/calibre http://localhost:8080 [proxy] -That's all, you will now be able to access the |app| Content Server under the /calibre URL in your apache server. +That's all, you will now be able to access the |app| Content Server under the /calibre URL in your apache server. The above rules pass all requests under /calibre to the calibre server running on port 8080 and thanks to the --url-prefix option above, the calibre server handles them transparently. .. note:: If you are willing to devote an entire VirtualHost to the content server, then there is no need to use --url-prefix and RewriteRule, instead just use the ProxyPass directive. From fc1e9175fcb40d95c701f4b2d8a3c1025c4c2aad Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Wed, 6 Apr 2011 22:00:53 -0600 Subject: [PATCH 55/57] Fix some server settings not being applied when clicking start server in Preferences->Sharing over the net --- src/calibre/gui2/preferences/server.py | 11 +---------- 1 file changed, 1 insertion(+), 10 deletions(-) diff --git a/src/calibre/gui2/preferences/server.py b/src/calibre/gui2/preferences/server.py index 82519f17cd..421dbe737f 100644 --- a/src/calibre/gui2/preferences/server.py +++ b/src/calibre/gui2/preferences/server.py @@ -57,17 +57,8 @@ class ConfigWidget(ConfigWidgetBase, Ui_Form): r('autolaunch_server', config) - def set_server_options(self): - c = self.proxy - c.set('port', self.opt_port.value()) - c.set('username', unicode(self.opt_username.text()).strip()) - p = unicode(self.opt_password.text()).strip() - if not p: - p = None - c.set('password', p) - def start_server(self): - self.set_server_options() + ConfigWidgetBase.commit(self) self.gui.start_content_server(check_started=False) while not self.gui.content_server.is_running and self.gui.content_server.exception is None: time.sleep(1) From 2befe1eb584186f7cff24088e7a1a0edd2ee3b74 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Wed, 6 Apr 2011 22:53:08 -0600 Subject: [PATCH 56/57] ... --- src/calibre/gui2/metadata/single_download.py | 91 +++++++++++++++----- src/calibre/utils/logging.py | 54 ++++++++++-- 2 files changed, 116 insertions(+), 29 deletions(-) diff --git a/src/calibre/gui2/metadata/single_download.py b/src/calibre/gui2/metadata/single_download.py index be521b6000..426d0b9e78 100644 --- a/src/calibre/gui2/metadata/single_download.py +++ b/src/calibre/gui2/metadata/single_download.py @@ -7,6 +7,8 @@ __license__ = 'GPL v3' __copyright__ = '2011, Kovid Goyal ' __docformat__ = 'restructuredtext en' +from threading import Thread, Event + from PyQt4.Qt import (QStyledItemDelegate, QTextDocument, QRectF, QIcon, Qt, QStyle, QApplication, QDialog, QVBoxLayout, QLabel, QDialogButtonBox, QStackedWidget, QWidget, QTableView, QGridLayout, QFontInfo, QPalette) @@ -14,6 +16,18 @@ from PyQt4.QtWebKit import QWebView from calibre.customize.ui import metadata_plugins from calibre.ebooks.metadata import authors_to_string +from calibre.utils.logging import ThreadSafeLog, UnicodeHTMLStream +from calibre.ebooks.metadata.sources.identify import identify + +class Log(ThreadSafeLog): # {{{ + + def __init__(self): + ThreadSafeLog.__init__(self, level=self.DEBUG) + self.outputs = [UnicodeHTMLStream()] + + def clear(self): + self.outputs[0].clear() +# }}} class RichTextDelegate(QStyledItemDelegate): # {{{ @@ -95,10 +109,35 @@ class Comments(QWebView): # {{{ self.setHtml(templ%html) # }}} +class IdentifyWorker(Thread): + + def __init__(self, log, abort, title, authors, identifiers): + Thread.__init__(self) + self.daemon = True + + self.log, self.abort = log, abort + self.title, self.authors, self.identifiers = (title, authors. + identifiers) + + self.results = [] + self.error = None + + def run(self): + try: + self.results = identify(self.log, self.abort, title=self.title, + authors=self.authors, identifiers=self.identifiers) + for i, result in enumerate(self.results): + result.gui_rank = i + except: + import traceback + self.error = traceback.format_exc() + class IdentifyWidget(QWidget): - def __init__(self, parent=None): + def __init__(self, log, parent=None): QWidget.__init__(self, parent) + self.log = log + self.abort = Event() self.l = l = QGridLayout() self.setLayout(l) @@ -123,7 +162,27 @@ class IdentifyWidget(QWidget): self.query.setWordWrap(True) l.addWidget(self.query, 2, 0, 1, 2) + self.comments_view.show_data('

'+_('Downloading')+ + '
.

'+ + ''' + + ''') + def start(self, title=None, authors=None, identifiers={}): + self.log.clear() + self.log('Starting download') parts = [] if title: parts.append('title:'+title) @@ -133,28 +192,18 @@ class IdentifyWidget(QWidget): x = ', '.join('%s:%s'%(k, v) for k, v in identifiers) parts.append(x) self.query.setText(_('Query: ')+'; '.join(parts)) - self.comments_view.show_data('

'+_('Downloading, please wait')+ - '.

'+ - ''' - - ''') + self.log(unicode(self.query.text())) + + self.worker = IdentifyWorker(self.log, self.abort, self.title, + self.authors, self.identifiers) + + # self.worker.start() class FullFetch(QDialog): # {{{ - def __init__(self, parent=None): + def __init__(self, log, parent=None): QDialog.__init__(self, parent) + self.log = log self.setWindowTitle(_('Downloading metadata...')) self.setWindowIcon(QIcon(I('metadata.png'))) @@ -168,7 +217,7 @@ class FullFetch(QDialog): # {{{ l.addWidget(self.bb) self.bb.rejected.connect(self.reject) - self.identify_widget = IdentifyWidget(self) + self.identify_widget = IdentifyWidget(log, self) self.stack.addWidget(self.identify_widget) self.resize(850, 500) @@ -184,6 +233,6 @@ class FullFetch(QDialog): # {{{ if __name__ == '__main__': app = QApplication([]) - d = FullFetch() + d = FullFetch(Log()) d.start(title='great gatsby', authors=['Fitzgerald']) diff --git a/src/calibre/utils/logging.py b/src/calibre/utils/logging.py index f4b2e6f0b6..45e21ded39 100644 --- a/src/calibre/utils/logging.py +++ b/src/calibre/utils/logging.py @@ -14,7 +14,7 @@ import sys, traceback, cStringIO from functools import partial from threading import RLock - +from calibre import isbytestring, force_unicode, as_unicode class Stream(object): @@ -63,15 +63,16 @@ class FileStream(Stream): class HTMLStream(Stream): + color = { + DEBUG: '', + INFO:'', + WARN: '', + ERROR: '' + } + normal = '' + def __init__(self, stream=sys.stdout): Stream.__init__(self, stream) - self.color = { - DEBUG: '', - INFO:'', - WARN: '', - ERROR: '' - } - self.normal = '' def prints(self, level, *args, **kwargs): self.stream.write(self.color[level]) @@ -82,6 +83,43 @@ class HTMLStream(Stream): def flush(self): self.stream.flush() +class UnicodeHTMLStream(HTMLStream): + + def __init__(self): + self.clear() + + def flush(self): + pass + + def prints(self, level, *args, **kwargs): + col = self.color[level] + if col != self.last_col: + if self.data: + self.data.append(self.normal) + self.data.append(col) + self.last_col = col + + sep = kwargs.get(u'sep', u' ') + end = kwargs.get(u'end', u'\n') + + for arg in args: + if isbytestring(arg): + arg = force_unicode(arg) + elif not isinstance(arg, unicode): + arg = as_unicode(arg) + self.data.append(arg+sep) + self.data.append(end) + + def clear(self): + self.data = [] + self.last_col = self.color[INFO] + + @property + def html(self): + end = self.normal if self.data else u'' + return u''.join(self.data) + end + + class Log(object): DEBUG = DEBUG From 011403978718034d2817e19ce0b91a20fc766f76 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Wed, 6 Apr 2011 22:54:15 -0600 Subject: [PATCH 57/57] ... --- src/calibre/gui2/metadata/single_download.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/calibre/gui2/metadata/single_download.py b/src/calibre/gui2/metadata/single_download.py index 426d0b9e78..049ac611c5 100644 --- a/src/calibre/gui2/metadata/single_download.py +++ b/src/calibre/gui2/metadata/single_download.py @@ -116,7 +116,7 @@ class IdentifyWorker(Thread): self.daemon = True self.log, self.abort = log, abort - self.title, self.authors, self.identifiers = (title, authors. + self.title, self.authors, self.identifiers = (title, authors, identifiers) self.results = [] @@ -194,8 +194,8 @@ class IdentifyWidget(QWidget): self.query.setText(_('Query: ')+'; '.join(parts)) self.log(unicode(self.query.text())) - self.worker = IdentifyWorker(self.log, self.abort, self.title, - self.authors, self.identifiers) + self.worker = IdentifyWorker(self.log, self.abort, title, + authors, identifiers) # self.worker.start()