Add plugin to download metadata from douban.com. Disabled by default. Add support for disabling plugins by default. Also ignore device plugins on unsupported platforms

This commit is contained in:
Kovid Goyal 2010-06-16 15:31:05 -06:00
parent b0e140317b
commit 09b679bb40
3 changed files with 422 additions and 108 deletions

View File

@ -458,8 +458,10 @@ from calibre.devices.folder_device.driver import FOLDER_DEVICE_FOR_CONFIG
from calibre.devices.kobo.driver import KOBO
from calibre.ebooks.metadata.fetch import GoogleBooks, ISBNDB, Amazon
from calibre.ebooks.metadata.douban import DoubanBooks
from calibre.library.catalog import CSV_XML, EPUB_MOBI
plugins = [HTML2ZIP, PML2PMLZ, ArchiveExtract, GoogleBooks, ISBNDB, Amazon, CSV_XML, EPUB_MOBI]
plugins = [HTML2ZIP, PML2PMLZ, ArchiveExtract, GoogleBooks, ISBNDB, Amazon,
DoubanBooks, CSV_XML, EPUB_MOBI]
plugins += [
ComicInput,
EPUBInput,

View File

@ -21,7 +21,7 @@ from calibre.utils.config import make_config_dir, Config, ConfigProxy, \
platform = 'linux'
if iswindows:
platform = 'windows'
if isosx:
elif isosx:
platform = 'osx'
from zipfile import ZipFile
@ -32,19 +32,25 @@ def _config():
c.add_opt('filetype_mapping', default={}, help=_('Mapping for filetype plugins'))
c.add_opt('plugin_customization', default={}, help=_('Local plugin customization'))
c.add_opt('disabled_plugins', default=set([]), help=_('Disabled plugins'))
c.add_opt('enabled_plugins', default=set([]), help=_('Enabled plugins'))
return ConfigProxy(c)
config = _config()
class InvalidPlugin(ValueError):
pass
class PluginNotFound(ValueError):
pass
def load_plugin(path_to_zip_file):
def find_plugin(name):
for plugin in _initialized_plugins:
if plugin.name == name:
return plugin
def load_plugin(path_to_zip_file): # {{{
'''
Load plugin from zip file or raise InvalidPlugin error
@ -76,11 +82,123 @@ def load_plugin(path_to_zip_file):
raise InvalidPlugin(_('No valid plugin found in ')+path_to_zip_file)
_initialized_plugins = []
# }}}
# Enable/disable plugins {{{
def disable_plugin(plugin_or_name):
x = getattr(plugin_or_name, 'name', plugin_or_name)
plugin = find_plugin(x)
if not plugin.can_be_disabled:
raise ValueError('Plugin %s cannot be disabled'%x)
dp = config['disabled_plugins']
dp.add(x)
config['disabled_plugins'] = dp
ep = config['enabled_plugins']
if x in ep:
ep.remove(x)
config['enabled_plugins'] = ep
def enable_plugin(plugin_or_name):
x = getattr(plugin_or_name, 'name', plugin_or_name)
dp = config['disabled_plugins']
if x in dp:
dp.remove(x)
config['disabled_plugins'] = dp
ep = config['enabled_plugins']
ep.add(x)
config['enabled_plugins'] = ep
default_disabled_plugins = set([
'Douban Books',
])
def is_disabled(plugin):
if plugin.name in config['enabled_plugins']: return False
return plugin.name in config['disabled_plugins'] or \
plugin.name in default_disabled_plugins
# }}}
# File type plugins {{{
_on_import = {}
_on_preprocess = {}
_on_postprocess = {}
def reread_filetype_plugins():
global _on_import
global _on_preprocess
global _on_postprocess
_on_import = {}
_on_preprocess = {}
_on_postprocess = {}
for plugin in _initialized_plugins:
if isinstance(plugin, FileTypePlugin):
for ft in plugin.file_types:
if plugin.on_import:
if not _on_import.has_key(ft):
_on_import[ft] = []
_on_import[ft].append(plugin)
if plugin.on_preprocess:
if not _on_preprocess.has_key(ft):
_on_preprocess[ft] = []
_on_preprocess[ft].append(plugin)
if plugin.on_postprocess:
if not _on_postprocess.has_key(ft):
_on_postprocess[ft] = []
_on_postprocess[ft].append(plugin)
def _run_filetype_plugins(path_to_file, ft=None, occasion='preprocess'):
occasion = {'import':_on_import, 'preprocess':_on_preprocess,
'postprocess':_on_postprocess}[occasion]
customization = config['plugin_customization']
if ft is None:
ft = os.path.splitext(path_to_file)[-1].lower().replace('.', '')
nfp = path_to_file
for plugin in occasion.get(ft, []):
if is_disabled(plugin):
continue
plugin.site_customization = customization.get(plugin.name, '')
with plugin:
try:
nfp = plugin.run(path_to_file)
if not nfp:
nfp = path_to_file
except:
print 'Running file type plugin %s failed with traceback:'%plugin.name
traceback.print_exc()
x = lambda j : os.path.normpath(os.path.normcase(j))
if occasion == 'postprocess' and x(nfp) != x(path_to_file):
shutil.copyfile(nfp, path_to_file)
nfp = path_to_file
return nfp
run_plugins_on_import = functools.partial(_run_filetype_plugins,
occasion='import')
run_plugins_on_preprocess = functools.partial(_run_filetype_plugins,
occasion='preprocess')
run_plugins_on_postprocess = functools.partial(_run_filetype_plugins,
occasion='postprocess')
# }}}
# PLugin customization {{{
def customize_plugin(plugin, custom):
d = config['plugin_customization']
d[plugin.name] = custom.strip()
config['plugin_customization'] = d
def plugin_customization(plugin):
return config['plugin_customization'].get(plugin.name, '')
# }}}
# Input/Output profiles {{{
def input_profiles():
for plugin in _initialized_plugins:
if isinstance(plugin, InputProfile):
@ -90,7 +208,9 @@ def output_profiles():
for plugin in _initialized_plugins:
if isinstance(plugin, OutputProfile):
yield plugin
# }}}
# Metadata sources {{{
def metadata_sources(metadata_type='basic', customize=True, isbndb_key=None):
for plugin in _initialized_plugins:
if isinstance(plugin, MetadataSource) and \
@ -117,31 +237,9 @@ def migrate_isbndb_key():
if key:
prefs.set('isbndb_com_key', '')
set_isbndb_key(key)
# }}}
def reread_filetype_plugins():
global _on_import
global _on_preprocess
global _on_postprocess
_on_import = {}
_on_preprocess = {}
_on_postprocess = {}
for plugin in _initialized_plugins:
if isinstance(plugin, FileTypePlugin):
for ft in plugin.file_types:
if plugin.on_import:
if not _on_import.has_key(ft):
_on_import[ft] = []
_on_import[ft].append(plugin)
if plugin.on_preprocess:
if not _on_preprocess.has_key(ft):
_on_preprocess[ft] = []
_on_preprocess[ft].append(plugin)
if plugin.on_postprocess:
if not _on_postprocess.has_key(ft):
_on_postprocess[ft] = []
_on_postprocess[ft].append(plugin)
# Metadata read/write {{{
_metadata_readers = {}
_metadata_writers = {}
def reread_metadata_plugins():
@ -233,51 +331,9 @@ def set_file_type_metadata(stream, mi, ftype):
print 'Failed to set metadata for', repr(getattr(mi, 'title', ''))
traceback.print_exc()
# }}}
def _run_filetype_plugins(path_to_file, ft=None, occasion='preprocess'):
occasion = {'import':_on_import, 'preprocess':_on_preprocess,
'postprocess':_on_postprocess}[occasion]
customization = config['plugin_customization']
if ft is None:
ft = os.path.splitext(path_to_file)[-1].lower().replace('.', '')
nfp = path_to_file
for plugin in occasion.get(ft, []):
if is_disabled(plugin):
continue
plugin.site_customization = customization.get(plugin.name, '')
with plugin:
try:
nfp = plugin.run(path_to_file)
if not nfp:
nfp = path_to_file
except:
print 'Running file type plugin %s failed with traceback:'%plugin.name
traceback.print_exc()
x = lambda j : os.path.normpath(os.path.normcase(j))
if occasion == 'postprocess' and x(nfp) != x(path_to_file):
shutil.copyfile(nfp, path_to_file)
nfp = path_to_file
return nfp
run_plugins_on_import = functools.partial(_run_filetype_plugins,
occasion='import')
run_plugins_on_preprocess = functools.partial(_run_filetype_plugins,
occasion='preprocess')
run_plugins_on_postprocess = functools.partial(_run_filetype_plugins,
occasion='postprocess')
def initialize_plugin(plugin, path_to_zip_file):
try:
p = plugin(path_to_zip_file)
p.initialize()
return p
except Exception:
print 'Failed to initialize plugin:', plugin.name, plugin.version
tb = traceback.format_exc()
raise InvalidPlugin((_('Initialization of plugin %s failed with traceback:')
%tb) + '\n'+tb)
# Add/remove plugins {{{
def add_plugin(path_to_zip_file):
make_config_dir()
@ -307,14 +363,9 @@ def remove_plugin(plugin_or_name):
initialize_plugins()
return removed
def is_disabled(plugin):
return plugin.name in config['disabled_plugins']
def find_plugin(name):
for plugin in _initialized_plugins:
if plugin.name == name:
return plugin
# }}}
# Input/Output format plugins {{{
def input_format_plugins():
for plugin in _initialized_plugins:
@ -364,6 +415,9 @@ def available_output_formats():
formats.add(plugin.file_type)
return formats
# }}}
# Catalog plugins {{{
def catalog_plugins():
for plugin in _initialized_plugins:
@ -383,27 +437,32 @@ def plugin_for_catalog_format(fmt):
if fmt.lower() in plugin.file_types:
return plugin
def device_plugins():
# }}}
def device_plugins(): # {{{
for plugin in _initialized_plugins:
if isinstance(plugin, DevicePlugin):
if not is_disabled(plugin):
yield plugin
if platform in plugin.supported_platforms:
yield plugin
# }}}
def disable_plugin(plugin_or_name):
x = getattr(plugin_or_name, 'name', plugin_or_name)
plugin = find_plugin(x)
if not plugin.can_be_disabled:
raise ValueError('Plugin %s cannot be disabled'%x)
dp = config['disabled_plugins']
dp.add(x)
config['disabled_plugins'] = dp
def enable_plugin(plugin_or_name):
x = getattr(plugin_or_name, 'name', plugin_or_name)
dp = config['disabled_plugins']
if x in dp:
dp.remove(x)
config['disabled_plugins'] = dp
# Initialize plugins {{{
_initialized_plugins = []
def initialize_plugin(plugin, path_to_zip_file):
try:
p = plugin(path_to_zip_file)
p.initialize()
return p
except Exception:
print 'Failed to initialize plugin:', plugin.name, plugin.version
tb = traceback.format_exc()
raise InvalidPlugin((_('Initialization of plugin %s failed with traceback:')
%tb) + '\n'+tb)
def initialize_plugins():
global _initialized_plugins
@ -425,10 +484,14 @@ def initialize_plugins():
initialize_plugins()
def intialized_plugins():
def initialized_plugins():
for plugin in _initialized_plugins:
yield plugin
# }}}
# CLI {{{
def option_parser():
parser = OptionParser(usage=_('''\
%prog options
@ -449,17 +512,6 @@ def option_parser():
help=_('Disable the named plugin'))
return parser
def initialized_plugins():
return _initialized_plugins
def customize_plugin(plugin, custom):
d = config['plugin_customization']
d[plugin.name] = custom.strip()
config['plugin_customization'] = d
def plugin_customization(plugin):
return config['plugin_customization'].get(plugin.name, '')
def main(args=sys.argv):
parser = option_parser()
if len(args) < 2:
@ -504,3 +556,5 @@ def main(args=sys.argv):
if __name__ == '__main__':
sys.exit(main())
# }}}

View File

@ -0,0 +1,258 @@
from __future__ import with_statement
__license__ = 'GPL 3'
__copyright__ = '2009, Kovid Goyal <kovid@kovidgoyal.net>; 2010, Li Fanxi <lifanxi@freemindworld.com>'
__docformat__ = 'restructuredtext en'
import sys, textwrap
import traceback
from urllib import urlencode
from functools import partial
from lxml import etree
from calibre import browser, preferred_encoding
from calibre.ebooks.metadata import MetaInformation
from calibre.utils.config import OptionParser
from calibre.ebooks.metadata.fetch import MetadataSource
from calibre.utils.date import parse_date, utcnow
DOUBAN_API_KEY = None
NAMESPACES = {
'openSearch':'http://a9.com/-/spec/opensearchrss/1.0/',
'atom' : 'http://www.w3.org/2005/Atom',
'db': 'http://www.douban.com/xmlns/'
}
XPath = partial(etree.XPath, namespaces=NAMESPACES)
total_results = XPath('//openSearch:totalResults')
start_index = XPath('//openSearch:startIndex')
items_per_page = XPath('//openSearch:itemsPerPage')
entry = XPath('//atom:entry')
entry_id = XPath('descendant::atom:id')
title = XPath('descendant::atom:title')
description = XPath('descendant::atom:summary')
publisher = XPath("descendant::db:attribute[@name='publisher']")
isbn = XPath("descendant::db:attribute[@name='isbn13']")
date = XPath("descendant::db:attribute[@name='pubdate']")
creator = XPath("descendant::db:attribute[@name='author']")
tag = XPath("descendant::db:tag")
class DoubanBooks(MetadataSource):
name = 'Douban Books'
description = _('Downloads metadata from Douban.com')
supported_platforms = ['windows', 'osx', 'linux'] # Platforms this plugin will run on
author = 'Li Fanxi <lifanxi@freemindworld.com>' # The author of this plugin
version = (1, 0, 0) # The version number of this plugin
def fetch(self):
try:
self.results = search(self.title, self.book_author, self.publisher,
self.isbn, max_results=10,
verbose=self.verbose)
except Exception, e:
self.exception = e
self.tb = traceback.format_exc()
def report(verbose):
if verbose:
import traceback
traceback.print_exc()
class Query(object):
SEARCH_URL = 'http://api.douban.com/book/subjects?'
ISBN_URL = 'http://api.douban.com/book/subject/isbn/'
type = "search"
def __init__(self, title=None, author=None, publisher=None, isbn=None,
max_results=20, start_index=1):
assert not(title is None and author is None and publisher is None and \
isbn is None)
assert (int(max_results) < 21)
q = ''
if isbn is not None:
q = isbn
self.type = 'isbn'
else:
def build_term(parts):
return ' '.join(x for x in parts)
if title is not None:
q += build_term(title.split())
if author is not None:
q += (' ' if q else '') + build_term(author.split())
if publisher is not None:
q += (' ' if q else '') + build_term(publisher.split())
self.type = 'search'
if isinstance(q, unicode):
q = q.encode('utf-8')
if self.type == "isbn":
self.url = self.ISBN_URL + q
if DOUBAN_API_KEY is not None:
self.url = self.url + "?apikey=" + DOUBAN_API_KEY
else:
self.url = self.SEARCH_URL+urlencode({
'q':q,
'max-results':max_results,
'start-index':start_index,
})
if DOUBAN_API_KEY is not None:
self.url = self.url + "&apikey=" + DOUBAN_API_KEY
def __call__(self, browser, verbose):
if verbose:
print 'Query:', self.url
if self.type == "search":
feed = etree.fromstring(browser.open(self.url).read())
total = int(total_results(feed)[0].text)
start = int(start_index(feed)[0].text)
entries = entry(feed)
new_start = start + len(entries)
if new_start > total:
new_start = 0
return entries, new_start
elif self.type == "isbn":
feed = etree.fromstring(browser.open(self.url).read())
entries = entry(feed)
return entries, 0
class ResultList(list):
def get_description(self, entry, verbose):
try:
desc = description(entry)
if desc:
return 'SUMMARY:\n'+desc[0].text
except:
report(verbose)
def get_title(self, entry):
candidates = [x.text for x in title(entry)]
return ': '.join(candidates)
def get_authors(self, entry):
m = creator(entry)
if not m:
m = []
m = [x.text for x in m]
return m
def get_tags(self, entry, verbose):
try:
btags = [x.attrib["name"] for x in tag(entry)]
tags = []
for t in btags:
tags.extend([y.strip() for y in t.split('/')])
tags = list(sorted(list(set(tags))))
except:
report(verbose)
tags = []
return [x.replace(',', ';') for x in tags]
def get_publisher(self, entry, verbose):
try:
pub = publisher(entry)[0].text
except:
pub = None
return pub
def get_isbn(self, entry, verbose):
try:
isbn13 = isbn(entry)[0].text
except Exception:
isbn13 = None
return isbn13
def get_date(self, entry, verbose):
try:
d = date(entry)
if d:
default = utcnow().replace(day=15)
d = parse_date(d[0].text, assume_utc=True, default=default)
else:
d = None
except:
report(verbose)
d = None
return d
def populate(self, entries, browser, verbose=False):
for x in entries:
try:
id_url = entry_id(x)[0].text
title = self.get_title(x)
except:
report(verbose)
mi = MetaInformation(title, self.get_authors(x))
try:
if DOUBAN_API_KEY is not None:
id_url = id_url + "?apikey=" + DOUBAN_API_KEY
raw = browser.open(id_url).read()
feed = etree.fromstring(raw)
x = entry(feed)[0]
except Exception, e:
if verbose:
print 'Failed to get all details for an entry'
print e
mi.comments = self.get_description(x, verbose)
mi.tags = self.get_tags(x, verbose)
mi.isbn = self.get_isbn(x, verbose)
mi.publisher = self.get_publisher(x, verbose)
mi.pubdate = self.get_date(x, verbose)
self.append(mi)
def search(title=None, author=None, publisher=None, isbn=None,
verbose=False, max_results=40):
br = browser()
start, entries = 1, []
while start > 0 and len(entries) <= max_results:
new, start = Query(title=title, author=author, publisher=publisher,
isbn=isbn, max_results=max_results, start_index=start)(br, verbose)
if not new:
break
entries.extend(new)
entries = entries[:max_results]
ans = ResultList()
ans.populate(entries, br, verbose)
return ans
def option_parser():
parser = OptionParser(textwrap.dedent(
'''\
%prog [options]
Fetch book metadata from Douban. You must specify one of title, author,
publisher or ISBN. If you specify ISBN the others are ignored. Will
fetch a maximum of 100 matches, so you should make your query as
specific as possible.
'''
))
parser.add_option('-t', '--title', help='Book title')
parser.add_option('-a', '--author', help='Book author(s)')
parser.add_option('-p', '--publisher', help='Book publisher')
parser.add_option('-i', '--isbn', help='Book ISBN')
parser.add_option('-m', '--max-results', default=10,
help='Maximum number of results to fetch')
parser.add_option('-v', '--verbose', default=0, action='count',
help='Be more verbose about errors')
return parser
def main(args=sys.argv):
parser = option_parser()
opts, args = parser.parse_args(args)
try:
results = search(opts.title, opts.author, opts.publisher, opts.isbn,
verbose=opts.verbose, max_results=int(opts.max_results))
except AssertionError:
report(True)
parser.print_help()
return 1
for result in results:
print unicode(result).encode(preferred_encoding)
print
if __name__ == '__main__':
sys.exit(main())