From 7f85ac4e03e123a264b5c9c6475803971ebeb9b7 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Tue, 15 Mar 2011 11:38:43 -0600 Subject: [PATCH 1/9] Cover browser: Try harder to ensure that everything runs in the GUI thread --- src/calibre/gui2/cover_flow.py | 22 +++++++++++++++----- src/calibre/gui2/pictureflow/pictureflow.cpp | 3 ++- 2 files changed, 19 insertions(+), 6 deletions(-) diff --git a/src/calibre/gui2/cover_flow.py b/src/calibre/gui2/cover_flow.py index cb951b09be..1d79d93bb2 100644 --- a/src/calibre/gui2/cover_flow.py +++ b/src/calibre/gui2/cover_flow.py @@ -53,7 +53,7 @@ if pictureflow is not None: def __init__(self, model, buffer=20): pictureflow.FlowImages.__init__(self) self.model = model - self.model.modelReset.connect(self.reset) + self.model.modelReset.connect(self.reset, type=Qt.QueuedConnection) def count(self): return self.model.count() @@ -83,6 +83,8 @@ if pictureflow is not None: class CoverFlow(pictureflow.PictureFlow): + dc_signal = pyqtSignal() + def __init__(self, parent=None): pictureflow.PictureFlow.__init__(self, parent, config['cover_flow_queue_length']+1) @@ -90,6 +92,8 @@ if pictureflow is not None: self.setFocusPolicy(Qt.WheelFocus) self.setSizePolicy(QSizePolicy(QSizePolicy.Expanding, QSizePolicy.Expanding)) + self.dc_signal.connect(self._data_changed, + type=Qt.QueuedConnection) def sizeHint(self): return self.minimumSize() @@ -101,6 +105,12 @@ if pictureflow is not None: elif ev.delta() > 0: self.showPrevious() + def dataChanged(self): + self.dc_signal.emit() + + def _data_changed(self): + pictureflow.PictureFlow.dataChanged(self) + else: CoverFlow = None @@ -135,8 +145,7 @@ class CoverFlowMixin(object): self.cover_flow = None if CoverFlow is not None: self.cf_last_updated_at = None - self.cover_flow_sync_timer = QTimer(self) - self.cover_flow_sync_timer.timeout.connect(self.cover_flow_do_sync) + self.cover_flow_syncing_enabled = False self.cover_flow_sync_flag = True self.cover_flow = CoverFlow(parent=self) self.cover_flow.currentChanged.connect(self.sync_listview_to_cf) @@ -179,14 +188,15 @@ class CoverFlowMixin(object): self.cover_flow.setFocus(Qt.OtherFocusReason) if CoverFlow is not None: self.cover_flow.setCurrentSlide(self.library_view.currentIndex().row()) - self.cover_flow_sync_timer.start(500) + self.cover_flow_syncing_enabled = True + QTimer.singleShot(500, self.cover_flow_do_sync) self.library_view.setCurrentIndex( self.library_view.currentIndex()) self.library_view.scroll_to_row(self.library_view.currentIndex().row()) def cover_browser_hidden(self): if CoverFlow is not None: - self.cover_flow_sync_timer.stop() + self.cover_flow_syncing_enabled = False idx = self.library_view.model().index(self.cover_flow.currentSlide(), 0) if idx.isValid(): sm = self.library_view.selectionModel() @@ -242,6 +252,8 @@ class CoverFlowMixin(object): except: import traceback traceback.print_exc() + if self.cover_flow_syncing_enabled: + QTimer.singleShot(500, self.cover_flow_do_sync) def sync_listview_to_cf(self, row): self.cf_last_updated_at = time.time() diff --git a/src/calibre/gui2/pictureflow/pictureflow.cpp b/src/calibre/gui2/pictureflow/pictureflow.cpp index 1c63ec410c..1d671154ae 100644 --- a/src/calibre/gui2/pictureflow/pictureflow.cpp +++ b/src/calibre/gui2/pictureflow/pictureflow.cpp @@ -439,7 +439,8 @@ void PictureFlowPrivate::setImages(FlowImages *images) QObject::disconnect(slideImages, SIGNAL(dataChanged()), widget, SLOT(dataChanged())); slideImages = images; dataChanged(); - QObject::connect(slideImages, SIGNAL(dataChanged()), widget, SLOT(dataChanged())); + QObject::connect(slideImages, SIGNAL(dataChanged()), widget, SLOT(dataChanged()), + Qt::QueuedConnection); } int PictureFlowPrivate::slideCount() const From 388859a20975399a9a2d9eb9ac439dc82c49cee6 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Tue, 15 Mar 2011 11:47:03 -0600 Subject: [PATCH 2/9] Fix #9409 (Fix for weird char in rbc_ru.recipe) --- resources/recipes/rbc_ru.recipe | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/resources/recipes/rbc_ru.recipe b/resources/recipes/rbc_ru.recipe index 2495a195dc..438cd73243 100644 --- a/resources/recipes/rbc_ru.recipe +++ b/resources/recipes/rbc_ru.recipe @@ -1,4 +1,4 @@ -# -*- coding: utf-8 -*- +# -*- coding: utf-8 -*- from calibre.web.feeds.news import BasicNewsRecipe From 9ed480327fdfb71c77183b8bc1b88fc128e86389 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Tue, 15 Mar 2011 11:54:49 -0600 Subject: [PATCH 3/9] Pro-linux.de by Bobus --- resources/recipes/pro_linux_de.recipe | 15 +++++++++++++++ 1 file changed, 15 insertions(+) create mode 100644 resources/recipes/pro_linux_de.recipe diff --git a/resources/recipes/pro_linux_de.recipe b/resources/recipes/pro_linux_de.recipe new file mode 100644 index 0000000000..c10c2ec047 --- /dev/null +++ b/resources/recipes/pro_linux_de.recipe @@ -0,0 +1,15 @@ +from calibre.web.feeds.news import BasicNewsRecipe + +class AdvancedUserRecipe1295265555(BasicNewsRecipe): + title = u'Pro-Linux.de' + language = 'de' + __author__ = 'Bobus' + oldest_article = 3 + max_articles_per_feed = 100 + + feeds = [(u'Pro-Linux', u'http://www.pro-linux.de/backend/pro-linux.rdf')] + + def print_version(self, url): + return url.replace('/news/1/', '/news/1/print/').replace('/artikel/2/', '/artikel/2/print/') + + remove_tags_after = [dict(name='div', attrs={'class':'print_links'})] From 4511e1e178d393c08a7f86cd641c72ec7e94fa0e Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Tue, 15 Mar 2011 13:30:52 -0600 Subject: [PATCH 4/9] Explicitly run a garbage collection after switching a library to ensure that it does not freeze the interface at a later time --- src/calibre/gui2/ui.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/calibre/gui2/ui.py b/src/calibre/gui2/ui.py index a2ec8c9846..6766635789 100644 --- a/src/calibre/gui2/ui.py +++ b/src/calibre/gui2/ui.py @@ -463,6 +463,10 @@ class Main(MainWindow, MainWindowMixin, DeviceMixin, EmailMixin, # {{{ self.card_a_view.reset() self.card_b_view.reset() self.device_manager.set_current_library_uuid(db.library_id) + # Run a garbage collection now so that it does not freeze the + # interface later + import gc + gc.collect() def set_window_title(self): From b7a92e7e3ebe94d0726319313a47158268abf556 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Tue, 15 Mar 2011 13:31:14 -0600 Subject: [PATCH 5/9] Fix #9407 (Metadata read failure from particular Kindle (Mobipocket) ebook) --- src/calibre/ebooks/metadata/opf2.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/calibre/ebooks/metadata/opf2.py b/src/calibre/ebooks/metadata/opf2.py index 9c59692628..846fdf1322 100644 --- a/src/calibre/ebooks/metadata/opf2.py +++ b/src/calibre/ebooks/metadata/opf2.py @@ -1251,6 +1251,7 @@ def metadata_to_opf(mi, as_string=True): from lxml import etree import textwrap from calibre.ebooks.oeb.base import OPF, DC + from calibre.utils.cleantext import clean_ascii_chars if not mi.application_id: mi.application_id = str(uuid.uuid4()) @@ -1306,7 +1307,7 @@ def metadata_to_opf(mi, as_string=True): if hasattr(mi, 'category') and mi.category: factory(DC('type'), mi.category) if mi.comments: - factory(DC('description'), mi.comments) + factory(DC('description'), clean_ascii_chars(mi.comments)) if mi.publisher: factory(DC('publisher'), mi.publisher) for key, val in mi.get_identifiers().iteritems(): From 2f4876f4742a72095093d6220effef8a10739a2c Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Tue, 15 Mar 2011 14:39:03 -0600 Subject: [PATCH 6/9] Beginning of the new amazon metadata download plugin --- src/calibre/__init__.py | 13 +- src/calibre/customize/builtins.py | 3 +- src/calibre/ebooks/metadata/sources/amazon.py | 221 +++++++++++++++++- src/calibre/ebooks/metadata/sources/base.py | 27 ++- src/calibre/ebooks/metadata/sources/google.py | 6 +- 5 files changed, 260 insertions(+), 10 deletions(-) diff --git a/src/calibre/__init__.py b/src/calibre/__init__.py index fa9a8f2404..ab578d8ae6 100644 --- a/src/calibre/__init__.py +++ b/src/calibre/__init__.py @@ -3,7 +3,7 @@ __license__ = 'GPL v3' __copyright__ = '2008, Kovid Goyal ' __docformat__ = 'restructuredtext en' -import uuid, sys, os, re, logging, time, \ +import uuid, sys, os, re, logging, time, random, \ __builtin__, warnings, multiprocessing from urllib import getproxies __builtin__.__dict__['dynamic_property'] = lambda(func): func(None) @@ -268,6 +268,17 @@ def get_parsed_proxy(typ='http', debug=True): prints('Using http proxy', str(ans)) return ans +def random_user_agent(): + choices = [ + 'Mozilla/5.0 (Windows; U; Windows NT 6.0; en-US; rv:1.9.2.11) Gecko/20101012 Firefox/3.6.11' + 'Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1)' + 'Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 6.0)' + 'Mozilla/4.0 (compatible; MSIE 8.0; Windows NT 6.1)' + 'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US) AppleWebKit/525.19 (KHTML, like Gecko) Chrome/0.2.153.1 Safari/525.19' + 'Mozilla/5.0 (Windows; U; Windows NT 6.0; en-US; rv:1.9.2.11) Gecko/20101012 Firefox/3.6.11' + ] + return choices[random.randint(0, len(choices)-1)] + def browser(honor_time=True, max_time=2, mobile_browser=False, user_agent=None): ''' diff --git a/src/calibre/customize/builtins.py b/src/calibre/customize/builtins.py index b24a5c9a17..b3d435165b 100644 --- a/src/calibre/customize/builtins.py +++ b/src/calibre/customize/builtins.py @@ -1031,7 +1031,8 @@ plugins += [LookAndFeel, Behavior, Columns, Toolbar, Search, InputOptions, # New metadata download plugins {{{ from calibre.ebooks.metadata.sources.google import GoogleBooks +from calibre.ebooks.metadata.sources.amazon import Amazon -plugins += [GoogleBooks] +plugins += [GoogleBooks, Amazon] # }}} diff --git a/src/calibre/ebooks/metadata/sources/amazon.py b/src/calibre/ebooks/metadata/sources/amazon.py index 88ac1213c5..cf09a88338 100644 --- a/src/calibre/ebooks/metadata/sources/amazon.py +++ b/src/calibre/ebooks/metadata/sources/amazon.py @@ -7,16 +7,231 @@ __license__ = 'GPL v3' __copyright__ = '2011, Kovid Goyal ' __docformat__ = 'restructuredtext en' +import socket, time +from urllib import urlencode +from threading import Thread +from lxml.html import soupparser, tostring + +from calibre import as_unicode +from calibre.ebooks.metadata import check_isbn from calibre.ebooks.metadata.sources.base import Source +from calibre.utils.cleantext import clean_ascii_chars +from calibre.ebooks.chardet import xml_to_unicode + +class Worker(Thread): + + def __init__(self, url, result_queue, browser, log, timeout=10): + self.url, self.result_queue = url, result_queue + self.log, self.timeout = log, timeout + self.browser = browser.clone_browser() + self.cover_url = self.amazon_id = None + + def run(self): + try: + self.get_details() + except: + self.log.error('get_details failed for url: %r'%self.url) + + def get_details(self): + try: + raw = self.browser.open_novisit(self.url, timeout=self.timeout).read().strip() + except Exception, e: + if callable(getattr(e, 'getcode', None)) and \ + e.getcode() == 404: + self.log.error('URL malformed: %r'%self.url) + return + attr = getattr(e, 'args', [None]) + attr = attr if attr else [None] + if isinstance(attr[0], socket.timeout): + msg = 'Amazon timed out. Try again later.' + self.log.error(msg) + else: + msg = 'Failed to make details query: %r'%self.url + self.log.exception(msg) + return + + raw = xml_to_unicode(raw, strip_encoding_pats=True, + resolve_entities=True)[0] + + if '404 - ' in raw: + self.log.error('URL malformed: %r'%self.url) + return + + try: + root = soupparser.fromstring(clean_ascii_chars(raw)) + except: + msg = 'Failed to parse amazon details page: %r'%self.url + self.log.exception(msg) + return + + errmsg = root.xpath('//*[@id="errorMessage"]') + if errmsg: + msg = 'Failed to parse amazon details page: %r'%self.url + msg += tostring(errmsg, method='text', encoding=unicode).strip() + self.log.error(msg) + return + + self.parse_details(root) + + def parse_details(self, root): + pass + class Amazon(Source): name = 'Amazon' description = _('Downloads metadata from Amazon') - capabilities = frozenset(['identify', 'cover']) - touched_fields = frozenset(['title', 'authors', 'isbn', 'pubdate', - 'comments', 'cover_data']) + capabilities = frozenset(['identify']) + touched_fields = frozenset(['title', 'authors', 'isbn', 'pubdate', 'comments']) + + AMAZON_DOMAINS = { + 'com': _('US'), + 'fr' : _('France'), + 'de' : _('Germany'), + } + + def create_query(self, log, title=None, authors=None, identifiers={}): + domain = self.prefs.get('domain', 'com') + + # See the amazon detailed search page to get all options + q = { 'search-alias' : 'aps', + 'unfiltered' : '1', + } + + if domain == 'com': + q['sort'] = 'relevanceexprank' + else: + q['sort'] = 'relevancerank' + + asin = identifiers.get('amazon', None) + isbn = check_isbn(identifiers.get('isbn', None)) + + if asin is not None: + q['field-keywords'] = asin + elif isbn is not None: + q['field-isbn'] = isbn + else: + # Only return book results + q['search-alias'] = 'stripbooks' + if title: + title_tokens = list(self.get_title_tokens(title)) + if title_tokens: + q['field-title'] = ' '.join(title_tokens) + if authors: + author_tokens = self.get_author_tokens(authors, + only_first_author=True) + if author_tokens: + q['field-author'] = ' '.join(author_tokens) + + if not ('field-keywords' in q or 'field-isbn' in q or + ('field-title' in q and 'field-author' in q)): + # Insufficient metadata to make an identify query + return None + + utf8q = dict([(x.encode('utf-8'), y.encode('utf-8')) for x, y in + q.iteritems()]) + url = 'http://www.amazon.%s/s/?'%domain + urlencode(utf8q) + return url + + + def identify(self, log, result_queue, abort, title=None, authors=None, + identifiers={}, timeout=10): + query = self.create_query(log, title=title, authors=authors, + identifiers=identifiers) + if query is None: + log.error('Insufficient metadata to construct query') + return + br = self.browser + try: + raw = br.open_novisit(query, timeout=timeout).read().strip() + except Exception, e: + if callable(getattr(e, 'getcode', None)) and \ + e.getcode() == 404: + log.error('Query malformed: %r'%query) + return + attr = getattr(e, 'args', [None]) + attr = attr if attr else [None] + if isinstance(attr[0], socket.timeout): + msg = _('Amazon timed out. Try again later.') + log.error(msg) + else: + msg = 'Failed to make identify query: %r'%query + log.exception(msg) + return as_unicode(msg) + + + raw = xml_to_unicode(raw, strip_encoding_pats=True, + resolve_entities=True)[0] + + if '<title>404 - ' in raw: + log.error('No matches found for query: %r'%query) + return + + try: + root = soupparser.fromstring(clean_ascii_chars(raw)) + except: + msg = 'Failed to parse amazon page for query: %r'%query + log.exception(msg) + return msg + + errmsg = root.xpath('//*[@id="errorMessage"]') + if errmsg: + msg = tostring(errmsg, method='text', encoding=unicode).strip() + log.error(msg) + # The error is almost always a not found error + return + + matches = [] + for div in root.xpath(r'//div[starts-with(@id, "result_")]'): + for a in div.xpath(r'descendant::a[@class="title" and @href]'): + title = tostring(a, method='text', encoding=unicode).lower() + if 'bulk pack' not in title: + matches.append(a.get('href')) + break + + # Keep only the top 5 matches as the matches are sorted by relevance by + # Amazon so lower matches are not likely to be very relevant + matches = matches[:5] + + if not matches: + log.error('No matches found with query: %r'%query) + return + + workers = [Worker(url, result_queue, br, log) for url in matches] + + for w in workers: + w.start() + # Don't send all requests at the same time + time.sleep(0.1) + + while not abort.is_set(): + a_worker_is_alive = False + for w in workers: + w.join(0.2) + if abort.is_set(): + break + if w.is_alive(): + a_worker_is_alive = True + if not a_worker_is_alive: + break + + return None + + +if __name__ == '__main__': + # To run these test use: calibre-debug -e + # src/calibre/ebooks/metadata/sources/amazon.py + from calibre.ebooks.metadata.sources.test import (test_identify_plugin, + title_test) + test_identify_plugin(Amazon.name, + [ + + ( + {'identifiers':{'isbn': '0743273567'}}, + [title_test('The great gatsby', exact=True)] + ), + ]) diff --git a/src/calibre/ebooks/metadata/sources/base.py b/src/calibre/ebooks/metadata/sources/base.py index 142224c599..523d012cd5 100644 --- a/src/calibre/ebooks/metadata/sources/base.py +++ b/src/calibre/ebooks/metadata/sources/base.py @@ -9,8 +9,12 @@ __docformat__ = 'restructuredtext en' import re, threading +from calibre import browser, random_user_agent from calibre.customize import Plugin from calibre.utils.logging import ThreadSafeLog, FileStream +from calibre.utils.config import JSONConfig + +msprefs = JSONConfig('metadata_sources.json') def create_log(ostream=None): log = ThreadSafeLog(level=ThreadSafeLog.DEBUG) @@ -24,8 +28,6 @@ class Source(Plugin): supported_platforms = ['windows', 'osx', 'linux'] - result_of_identify_is_complete = True - capabilities = frozenset() touched_fields = frozenset() @@ -34,6 +36,27 @@ class Source(Plugin): Plugin.__init__(self, *args, **kwargs) self._isbn_to_identifier_cache = {} self.cache_lock = threading.RLock() + self._config_obj = None + self._browser = None + + # Configuration {{{ + + @property + def prefs(self): + if self._config_obj is None: + self._config_obj = JSONConfig('metadata_sources/%s.json'%self.name) + return self._config_obj + # }}} + + # Browser {{{ + + @property + def browser(self): + if self._browser is None: + self._browser = browser(user_agent=random_user_agent()) + return self._browser + + # }}} # Utility functions {{{ diff --git a/src/calibre/ebooks/metadata/sources/google.py b/src/calibre/ebooks/metadata/sources/google.py index 0720b21ded..b7c300e933 100644 --- a/src/calibre/ebooks/metadata/sources/google.py +++ b/src/calibre/ebooks/metadata/sources/google.py @@ -19,7 +19,7 @@ from calibre.ebooks.metadata.book.base import Metadata from calibre.ebooks.chardet import xml_to_unicode from calibre.utils.date import parse_date, utcnow from calibre.utils.cleantext import clean_ascii_chars -from calibre import browser, as_unicode +from calibre import as_unicode NAMESPACES = { 'openSearch':'http://a9.com/-/spec/opensearchrss/1.0/', @@ -150,7 +150,7 @@ class GoogleBooks(Source): def create_query(self, log, title=None, authors=None, identifiers={}): BASE_URL = 'http://books.google.com/books/feeds/volumes?' - isbn = identifiers.get('isbn', None) + isbn = check_isbn(identifiers.get('isbn', None)) q = '' if isbn is not None: q += 'isbn:'+isbn @@ -212,7 +212,7 @@ class GoogleBooks(Source): identifiers={}, timeout=5): query = self.create_query(log, title=title, authors=authors, identifiers=identifiers) - br = browser() + br = self.browser() try: raw = br.open_novisit(query, timeout=timeout).read() except Exception, e: From 428ed899fcd5edb70ffc11bdb3aa63154e87aa75 Mon Sep 17 00:00:00 2001 From: Kovid Goyal <kovid@kovidgoyal.net> Date: Tue, 15 Mar 2011 14:51:22 -0600 Subject: [PATCH 7/9] Conversion pipeline: When detecting chapters/toc links from HTML normalize spaces and increase maximum TOC title length to 1000 characters from 100 characters. Fixes #9363 (Shortening text on generating TOC.) --- src/calibre/ebooks/oeb/transforms/structure.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/calibre/ebooks/oeb/transforms/structure.py b/src/calibre/ebooks/oeb/transforms/structure.py index fc338da692..0d8bdcdf2e 100644 --- a/src/calibre/ebooks/oeb/transforms/structure.py +++ b/src/calibre/ebooks/oeb/transforms/structure.py @@ -81,6 +81,7 @@ class DetectStructure(object): page_break_after = 'display: block; page-break-after: always' for item, elem in self.detected_chapters: text = xml2text(elem).strip() + text = re.sub(r'\s+', ' ', text.strip()) self.log('\tDetected chapter:', text[:50]) if chapter_mark == 'none': continue @@ -137,7 +138,8 @@ class DetectStructure(object): text = elem.get('title', '') if not text: text = elem.get('alt', '') - text = text[:100].strip() + text = re.sub(r'\s+', ' ', text.strip()) + text = text[:1000].strip() id = elem.get('id', 'calibre_toc_%d'%counter) elem.set('id', id) href = '#'.join((item.href, id)) From e937dccaa37118165c09336e57909b3164af8ddc Mon Sep 17 00:00:00 2001 From: Kovid Goyal <kovid@kovidgoyal.net> Date: Tue, 15 Mar 2011 18:39:14 -0600 Subject: [PATCH 8/9] Disable automatic garbage collection, instead ensure garbage collection runs only in the GUI thread --- src/calibre/gui2/main_window.py | 67 ++++++++++++++++++++++++++++----- src/calibre/gui2/ui.py | 9 +++-- 2 files changed, 63 insertions(+), 13 deletions(-) diff --git a/src/calibre/gui2/main_window.py b/src/calibre/gui2/main_window.py index e068e851c2..ec58dd3856 100644 --- a/src/calibre/gui2/main_window.py +++ b/src/calibre/gui2/main_window.py @@ -1,10 +1,14 @@ +from __future__ import (unicode_literals, division, absolute_import, + print_function) + __license__ = 'GPL v3' __copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>' -import StringIO, traceback, sys -from PyQt4.Qt import QMainWindow, QString, Qt, QFont, QCoreApplication, SIGNAL,\ - QAction, QMenu, QMenuBar, QIcon, pyqtSignal +import StringIO, traceback, sys, gc + +from PyQt4.Qt import QMainWindow, QString, Qt, QFont, QTimer, \ + QAction, QMenu, QMenuBar, QIcon, pyqtSignal, QObject from calibre.gui2.dialogs.conversion_error import ConversionErrorDialog from calibre.utils.config import OptionParser from calibre.gui2 import error_dialog @@ -35,6 +39,53 @@ class DebugWindow(ConversionErrorDialog): def flush(self): pass +class GarbageCollector(QObject): + + ''' + Disable automatic garbage collection and instead collect manually + every INTERVAL milliseconds. + + This is done to ensure that garbage collection only happens in the GUI + thread, as otherwise Qt can crash. + ''' + + INTERVAL = 5000 + + def __init__(self, parent, debug=False): + QObject.__init__(self, parent) + self.debug = debug + + self.timer = QTimer(self) + self.timer.timeout.connect(self.check) + + self.threshold = gc.get_threshold() + gc.disable() + self.timer.start(self.INTERVAL) + #gc.set_debug(gc.DEBUG_SAVEALL) + + def check(self): + #return self.debug_cycles() + l0, l1, l2 = gc.get_count() + if self.debug: + print ('gc_check called:', l0, l1, l2) + if l0 > self.threshold[0]: + num = gc.collect(0) + if self.debug: + print ('collecting gen 0, found:', num, 'unreachable') + if l1 > self.threshold[1]: + num = gc.collect(1) + if self.debug: + print ('collecting gen 1, found:', num, 'unreachable') + if l2 > self.threshold[2]: + num = gc.collect(2) + if self.debug: + print ('collecting gen 2, found:', num, 'unreachable') + + def debug_cycles(self): + gc.collect() + for obj in gc.garbage: + print (obj, repr(obj), type(obj)) + class MainWindow(QMainWindow): ___menu_bar = None @@ -64,19 +115,15 @@ class MainWindow(QMainWindow): quit_action.setMenuRole(QAction.QuitRole) return preferences_action, quit_action - def __init__(self, opts, parent=None): + def __init__(self, opts, parent=None, disable_automatic_gc=False): QMainWindow.__init__(self, parent) - app = QCoreApplication.instance() - if app is not None: - self.connect(app, SIGNAL('unixSignal(int)'), self.unix_signal) + if disable_automatic_gc: + self._gc = GarbageCollector(self, debug=False) if getattr(opts, 'redirect', False): self.__console_redirect = DebugWindow(self) sys.stdout = sys.stderr = self.__console_redirect self.__console_redirect.show() - def unix_signal(self, signal): - print 'Received signal:', repr(signal) - def unhandled_exception(self, type, value, tb): if type == KeyboardInterrupt: self.keyboard_interrupt.emit() diff --git a/src/calibre/gui2/ui.py b/src/calibre/gui2/ui.py index 6766635789..4af8c1ea54 100644 --- a/src/calibre/gui2/ui.py +++ b/src/calibre/gui2/ui.py @@ -9,7 +9,7 @@ __docformat__ = 'restructuredtext en' '''The main GUI''' -import collections, os, sys, textwrap, time +import collections, os, sys, textwrap, time, gc from Queue import Queue, Empty from threading import Thread from PyQt4.Qt import Qt, SIGNAL, QTimer, QHelpEvent, QAction, \ @@ -95,7 +95,7 @@ class Main(MainWindow, MainWindowMixin, DeviceMixin, EmailMixin, # {{{ def __init__(self, opts, parent=None, gui_debug=None): - MainWindow.__init__(self, opts, parent) + MainWindow.__init__(self, opts, parent=parent, disable_automatic_gc=True) self.opts = opts self.device_connected = None self.gui_debug = gui_debug @@ -298,6 +298,9 @@ class Main(MainWindow, MainWindowMixin, DeviceMixin, EmailMixin, # {{{ raise self.device_manager.set_current_library_uuid(db.library_id) + # Collect cycles now + gc.collect() + if show_gui and self.gui_debug is not None: info_dialog(self, _('Debug mode'), '<p>' + _('You have started calibre in debug mode. After you ' @@ -399,6 +402,7 @@ class Main(MainWindow, MainWindowMixin, DeviceMixin, EmailMixin, # {{{ elif msg.startswith('refreshdb:'): self.library_view.model().refresh() self.library_view.model().research() + self.tags_view.recount() else: print msg @@ -465,7 +469,6 @@ class Main(MainWindow, MainWindowMixin, DeviceMixin, EmailMixin, # {{{ self.device_manager.set_current_library_uuid(db.library_id) # Run a garbage collection now so that it does not freeze the # interface later - import gc gc.collect() From 19581fbfc6daca006ccd60b19a6eb8abaadf8e77 Mon Sep 17 00:00:00 2001 From: Kovid Goyal <kovid@kovidgoyal.net> Date: Tue, 15 Mar 2011 21:03:03 -0600 Subject: [PATCH 9/9] ... --- src/calibre/ebooks/metadata/sources/amazon.py | 4 ++-- src/calibre/ebooks/metadata/sources/google.py | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/calibre/ebooks/metadata/sources/amazon.py b/src/calibre/ebooks/metadata/sources/amazon.py index cf09a88338..a62a9683cb 100644 --- a/src/calibre/ebooks/metadata/sources/amazon.py +++ b/src/calibre/ebooks/metadata/sources/amazon.py @@ -21,7 +21,7 @@ from calibre.ebooks.chardet import xml_to_unicode class Worker(Thread): - def __init__(self, url, result_queue, browser, log, timeout=10): + def __init__(self, url, result_queue, browser, log, timeout=20): self.url, self.result_queue = url, result_queue self.log, self.timeout = log, timeout self.browser = browser.clone_browser() @@ -137,7 +137,7 @@ class Amazon(Source): def identify(self, log, result_queue, abort, title=None, authors=None, - identifiers={}, timeout=10): + identifiers={}, timeout=20): query = self.create_query(log, title=title, authors=authors, identifiers=identifiers) if query is None: diff --git a/src/calibre/ebooks/metadata/sources/google.py b/src/calibre/ebooks/metadata/sources/google.py index b7c300e933..923062379e 100644 --- a/src/calibre/ebooks/metadata/sources/google.py +++ b/src/calibre/ebooks/metadata/sources/google.py @@ -209,7 +209,7 @@ class GoogleBooks(Source): break def identify(self, log, result_queue, abort, title=None, authors=None, - identifiers={}, timeout=5): + identifiers={}, timeout=20): query = self.create_query(log, title=title, authors=authors, identifiers=identifiers) br = self.browser()