From 4baab972aba634db2bcc3f97e43cd49e111a8aad Mon Sep 17 00:00:00 2001 From: Li Fanxi Date: Mon, 27 Sep 2010 18:48:55 +0800 Subject: [PATCH 1/5] Add Douban.com cover plugin --- src/calibre/customize/builtins.py | 4 +- src/calibre/ebooks/metadata/covers.py | 64 +++++++++++++++++++++++++++ 2 files changed, 66 insertions(+), 2 deletions(-) diff --git a/src/calibre/customize/builtins.py b/src/calibre/customize/builtins.py index ec9f7e2bc2..5fd51de38c 100644 --- a/src/calibre/customize/builtins.py +++ b/src/calibre/customize/builtins.py @@ -469,14 +469,14 @@ from calibre.ebooks.metadata.fetch import GoogleBooks, ISBNDB, Amazon, \ LibraryThing from calibre.ebooks.metadata.douban import DoubanBooks from calibre.ebooks.metadata.covers import OpenLibraryCovers, \ - LibraryThingCovers + LibraryThingCovers, DoubanCovers from calibre.library.catalog import CSV_XML, EPUB_MOBI, BIBTEX from calibre.ebooks.epub.fix.unmanifested import Unmanifested from calibre.ebooks.epub.fix.epubcheck import Epubcheck plugins = [HTML2ZIP, PML2PMLZ, ArchiveExtract, GoogleBooks, ISBNDB, Amazon, LibraryThing, DoubanBooks, CSV_XML, EPUB_MOBI, BIBTEX, Unmanifested, - Epubcheck, OpenLibraryCovers, LibraryThingCovers] + Epubcheck, OpenLibraryCovers, LibraryThingCovers, DoubanCovers] plugins += [ ComicInput, EPUBInput, diff --git a/src/calibre/ebooks/metadata/covers.py b/src/calibre/ebooks/metadata/covers.py index b05444c1c6..bef19b4db7 100644 --- a/src/calibre/ebooks/metadata/covers.py +++ b/src/calibre/ebooks/metadata/covers.py @@ -9,6 +9,7 @@ import traceback, socket, re, sys from functools import partial from threading import Thread, Event from Queue import Queue, Empty +from lxml import etree import mechanize @@ -216,6 +217,69 @@ def download_covers(mi, result_queue, max_covers=50, timeout=5.): # {{{ # }}} +class DoubanCovers(CoverDownload): # {{{ + 'Download covers from Douban.com' + + DOUBAN_ISBN_URL = 'http://api.douban.com/book/subject/isbn/' + CALIBRE_DOUBAN_API_KEY = '0bd1672394eb1ebf2374356abec15c3d' + name = 'Douban.com covers' + description = _('Download covers from Douban.com') + author = 'Li Fanxi' + + def get_cover_url(self, isbn, br, timeout=5.): + try: + url = self.DOUBAN_ISBN_URL + isbn + "?apikey=" + self.CALIBRE_DOUBAN_API_KEY + src = br.open(url, timeout=timeout).read() + except Exception, err: + if isinstance(getattr(err, 'args', [None])[0], socket.timeout): + err = Exception(_('Douban.com API timed out. Try again later.')) + raise err + else: + feed = etree.fromstring(src) + NAMESPACES = { + 'openSearch':'http://a9.com/-/spec/opensearchrss/1.0/', + 'atom' : 'http://www.w3.org/2005/Atom', + 'db': 'http://www.douban.com/xmlns/' + } + XPath = partial(etree.XPath, namespaces=NAMESPACES) + entries = XPath('//atom:entry')(feed) + if len(entries) < 1: + return None + try: + cover_url = XPath("descendant::atom:link[@rel='image']/attribute::href") + u = cover_url(entries[0])[0].replace('/spic/', '/lpic/'); + # If URL contains "book-default", the book doesn't have a cover + if u.find('book-default') != -1: + return None + except: + return None + return u + + def has_cover(self, mi, ans, timeout=5.): + print "has_cover called" + if not mi.isbn: + return False + br = browser() + try: + if self.get_cover_url(mi.isbn, br, timeout=timeout) != None: + self.debug('cover for', mi.isbn, 'found') + ans.set() + except Exception, e: + self.debug(e) + + def get_covers(self, mi, result_queue, abort, timeout=5.): + if not mi.isbn: + return + br = browser() + try: + url = self.get_cover_url(mi.isbn, br, timeout=timeout) + cover_data = br.open_novisit(url).read() + result_queue.put((True, cover_data, 'jpg', self.name)) + except Exception, e: + result_queue.put((False, self.exception_to_string(e), + traceback.format_exc(), self.name)) +# }}} + def download_cover(mi, timeout=5.): # {{{ results = Queue() download_covers(mi, results, max_covers=1, timeout=timeout) From 04187001a638427a984065dce13c8b56cc60d736 Mon Sep 17 00:00:00 2001 From: Li Fanxi Date: Mon, 27 Sep 2010 18:56:09 +0800 Subject: [PATCH 2/5] Removed an unnecessary debug print --- src/calibre/ebooks/metadata/covers.py | 1 - 1 file changed, 1 deletion(-) diff --git a/src/calibre/ebooks/metadata/covers.py b/src/calibre/ebooks/metadata/covers.py index bef19b4db7..2c071dbbc9 100644 --- a/src/calibre/ebooks/metadata/covers.py +++ b/src/calibre/ebooks/metadata/covers.py @@ -256,7 +256,6 @@ class DoubanCovers(CoverDownload): # {{{ return u def has_cover(self, mi, ans, timeout=5.): - print "has_cover called" if not mi.isbn: return False br = browser() From a0382a8d86c20c386b92a9b52a7a08b65279f22d Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Tue, 28 Sep 2010 09:57:49 -0600 Subject: [PATCH 3/5] Fix #6986 (Updated recipe for Telegraph UK) --- resources/recipes/telegraph_uk.recipe | 49 +++++++++++---------------- 1 file changed, 20 insertions(+), 29 deletions(-) diff --git a/resources/recipes/telegraph_uk.recipe b/resources/recipes/telegraph_uk.recipe index 2c261987b2..f79f0fa50c 100644 --- a/resources/recipes/telegraph_uk.recipe +++ b/resources/recipes/telegraph_uk.recipe @@ -1,6 +1,5 @@ -#!/usr/bin/env python __license__ = 'GPL v3' -__copyright__ = '2008, Darko Miletic ' +__copyright__ = '2008-2010, Darko Miletic ' ''' telegraph.co.uk ''' @@ -8,14 +7,16 @@ telegraph.co.uk from calibre.web.feeds.news import BasicNewsRecipe class TelegraphUK(BasicNewsRecipe): - title = u'Telegraph.co.uk' + title = 'Telegraph.co.uk' __author__ = 'Darko Miletic and Sujata Raman' description = 'News from United Kingdom' - oldest_article = 7 + oldest_article = 2 + category = 'news, politics, UK' + publisher = 'Telegraph Media Group ltd.' max_articles_per_feed = 100 no_stylesheets = True - language = 'en' - + language = 'en_GB' + remove_empty_feeds = True use_embedded_content = False extra_css = ''' @@ -27,13 +28,20 @@ class TelegraphUK(BasicNewsRecipe): .imageExtras{color:#666666; font-family :Arial,Helvetica,sans-serif; font-size: xx-small;} ''' + conversion_options = { + 'comment' : description + , 'tags' : category + , 'publisher' : publisher + , 'language' : language + } + + keep_only_tags = [ - dict(name='div', attrs={'class':'storyHead'}) - ,dict(name='div', attrs={'class':'story' }) - #,dict(name='div', attrs={'class':['slideshowHD gutterUnder',"twoThirds gutter","caption" ] }) + dict(name='div', attrs={'class':['storyHead','byline']}) + ,dict(name='div', attrs={'id':'mainBodyArea' }) ] - remove_tags = [dict(name='div', attrs={'class':['related_links_inline',"imgindex","next","prev","gutterUnder",'ssImgHide','imageExtras','ssImg hide']}) - #,dict(name='div', attrs={'class':['toolshideoneQuarter']}) + remove_tags = [dict(name='div', attrs={'class':['related_links_inline',"imgindex","next","prev","gutterUnder",'ssImgHide','imageExtras','ssImg hide','related_links_video']}) + ,dict(name='ul' , attrs={'class':['shareThis shareBottom']}) ,dict(name='span', attrs={'class':['num','placeComment']}) ] @@ -51,24 +59,7 @@ class TelegraphUK(BasicNewsRecipe): ] def get_article_url(self, article): - - url = article.get('guid', None) - + url = article.get('link', None) if 'picture-galleries' in url or 'pictures' in url or 'picturegalleries' in url : url = None - return url - - - def postprocess_html(self,soup,first): - - for bylineTag in soup.findAll(name='div', attrs={'class':'byline'}): - for pTag in bylineTag.findAll(name='p'): - if getattr(pTag.contents[0],"Comments",True): - pTag.extract() - return soup - - - - - From fdc171a0acc81c367eb0e626d5f3a41d8f48814b Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Tue, 28 Sep 2010 13:55:02 -0600 Subject: [PATCH 4/5] Automatically enable the Douban metadata download plugins if the user choose chinese as the interface language in the welcome wizard --- src/calibre/gui2/wizard/__init__.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/src/calibre/gui2/wizard/__init__.py b/src/calibre/gui2/wizard/__init__.py index f3234d48d5..8460210cd0 100644 --- a/src/calibre/gui2/wizard/__init__.py +++ b/src/calibre/gui2/wizard/__init__.py @@ -584,6 +584,13 @@ class LibraryPage(QWizardPage, LibraryUI): qt_app.load_translations() self.emit(SIGNAL('retranslate()')) self.init_languages() + try: + if prefs['language'].lower().startswith('zh'): + from calibre.customize.ui import enable_plugin + for name in ('Douban Books', 'Douban.com covers'): + enable_plugin(name) + except: + pass def change(self): dir = choose_dir(self, 'database location dialog', From 5fe81f0162680bc3f3c1dd9bbbf8b826980ad8c6 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Tue, 28 Sep 2010 14:24:16 -0600 Subject: [PATCH 5/5] Welcome wizard: Prevent the user from choosing a non empty folder as her calibre library --- src/calibre/gui2/wizard/__init__.py | 29 ++++++++++++++++++++++++++--- 1 file changed, 26 insertions(+), 3 deletions(-) diff --git a/src/calibre/gui2/wizard/__init__.py b/src/calibre/gui2/wizard/__init__.py index 8460210cd0..ef58ec3a90 100644 --- a/src/calibre/gui2/wizard/__init__.py +++ b/src/calibre/gui2/wizard/__init__.py @@ -592,11 +592,34 @@ class LibraryPage(QWizardPage, LibraryUI): except: pass + def is_library_dir_suitable(self, x): + return LibraryDatabase2.exists_at(x) or not os.listdir(x) + + def validatePage(self): + newloc = unicode(self.location.text()) + if not self.is_library_dir_suitable(newloc): + self.show_library_dir_error(newloc) + return False + return True + def change(self): - dir = choose_dir(self, 'database location dialog', + x = choose_dir(self, 'database location dialog', _('Select location for books')) - if dir: - self.location.setText(dir) + if x: + if self.is_library_dir_suitable(x): + self.location.setText(x) + else: + self.show_library_dir_error(x) + + def show_library_dir_error(self, x): + if not isinstance(x, unicode): + try: + x = x.decode(filesystem_encoding) + except: + x = unicode(repr(x)) + error_dialog(self, _('Bad location'), + _('You must choose an empty folder for ' + 'the calibre library. %s is not empty.')%x, show=True) def initializePage(self): lp = prefs['library_path']