mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-08 10:44:09 -04:00
IGN:Working command line interface to the Google Books Data API to fetch book metadata
This commit is contained in:
parent
2e5fe9a047
commit
524013c86c
244
src/calibre/ebooks/metadata/google_books.py
Normal file
244
src/calibre/ebooks/metadata/google_books.py
Normal file
@ -0,0 +1,244 @@
|
|||||||
|
from __future__ import with_statement
|
||||||
|
__license__ = 'GPL 3'
|
||||||
|
__copyright__ = '2009, Kovid Goyal <kovid@kovidgoyal.net>'
|
||||||
|
__docformat__ = 'restructuredtext en'
|
||||||
|
|
||||||
|
import sys, textwrap
|
||||||
|
from urllib import urlencode
|
||||||
|
from functools import partial
|
||||||
|
|
||||||
|
from lxml import etree
|
||||||
|
from dateutil import parser
|
||||||
|
|
||||||
|
from calibre import browser, preferred_encoding
|
||||||
|
from calibre.ebooks.metadata import MetaInformation
|
||||||
|
from calibre.utils.config import OptionParser
|
||||||
|
|
||||||
|
NAMESPACES = {
|
||||||
|
'openSearch':'http://a9.com/-/spec/opensearchrss/1.0/',
|
||||||
|
'atom' : 'http://www.w3.org/2005/Atom',
|
||||||
|
'dc': 'http://purl.org/dc/terms'
|
||||||
|
}
|
||||||
|
XPath = partial(etree.XPath, namespaces=NAMESPACES)
|
||||||
|
|
||||||
|
total_results = XPath('//openSearch:totalResults')
|
||||||
|
start_index = XPath('//openSearch:startIndex')
|
||||||
|
items_per_page = XPath('//openSearch:itemsPerPage')
|
||||||
|
entry = XPath('//atom:entry')
|
||||||
|
entry_id = XPath('descendant::atom:id')
|
||||||
|
creator = XPath('descendant::dc:creator')
|
||||||
|
identifier = XPath('descendant::dc:identifier')
|
||||||
|
title = XPath('descendant::dc:title')
|
||||||
|
date = XPath('descendant::dc:date')
|
||||||
|
publisher = XPath('descendant::dc:publisher')
|
||||||
|
subject = XPath('descendant::dc:subject')
|
||||||
|
description = XPath('descendant::dc:description')
|
||||||
|
language = XPath('descendant::dc:language')
|
||||||
|
|
||||||
|
def report(verbose):
|
||||||
|
if verbose:
|
||||||
|
import traceback
|
||||||
|
traceback.print_exc()
|
||||||
|
|
||||||
|
|
||||||
|
class Query(object):
|
||||||
|
|
||||||
|
BASE_URL = 'http://books.google.com/books/feeds/volumes?'
|
||||||
|
|
||||||
|
def __init__(self, title=None, author=None, publisher=None, isbn=None,
|
||||||
|
max_results=20, min_viewability='none', start_index=1):
|
||||||
|
assert not(title is None and author is None and publisher is None and \
|
||||||
|
isbn is None)
|
||||||
|
assert (max_results < 21)
|
||||||
|
assert (min_viewability in ('none', 'partial', 'full'))
|
||||||
|
q = ''
|
||||||
|
if isbn is not None:
|
||||||
|
q += 'isbn:'+isbn
|
||||||
|
else:
|
||||||
|
def build_term(prefix, parts):
|
||||||
|
return ' '.join('in'+prefix + ':' + x for x in parts)
|
||||||
|
if title is not None:
|
||||||
|
q += build_term('title', title.split())
|
||||||
|
if author is not None:
|
||||||
|
q += build_term('author', author.split())
|
||||||
|
if publisher is not None:
|
||||||
|
q += build_term('publisher', publisher.split())
|
||||||
|
|
||||||
|
self.url = self.BASE_URL+urlencode({
|
||||||
|
'q':q,
|
||||||
|
'max-results':max_results,
|
||||||
|
'start-index':start_index,
|
||||||
|
'min-viewability':min_viewability,
|
||||||
|
})
|
||||||
|
|
||||||
|
def __call__(self, browser, verbose):
|
||||||
|
if verbose:
|
||||||
|
print 'Query:', self.url
|
||||||
|
feed = etree.fromstring(browser.open(self.url).read())
|
||||||
|
total = int(total_results(feed)[0].text)
|
||||||
|
start = int(start_index(feed)[0].text)
|
||||||
|
entries = entry(feed)
|
||||||
|
new_start = start + len(entries)
|
||||||
|
if new_start > total:
|
||||||
|
new_start = 0
|
||||||
|
return entries, new_start
|
||||||
|
|
||||||
|
|
||||||
|
class ResultList(list):
|
||||||
|
|
||||||
|
def get_description(self, entry, verbose):
|
||||||
|
try:
|
||||||
|
desc = description(entry)
|
||||||
|
if desc:
|
||||||
|
return desc[0].text
|
||||||
|
except:
|
||||||
|
report(verbose)
|
||||||
|
|
||||||
|
def get_language(self, entry, verbose):
|
||||||
|
try:
|
||||||
|
l = language(entry)
|
||||||
|
if l:
|
||||||
|
return l[0].text
|
||||||
|
except:
|
||||||
|
report(verbose)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
def get_title(self, entry):
|
||||||
|
candidates = [x.text for x in title(entry)]
|
||||||
|
candidates.sort(cmp=lambda x,y: cmp(len(x), len(y)), reverse=True)
|
||||||
|
return candidates[0]
|
||||||
|
|
||||||
|
def get_authors(self, entry):
|
||||||
|
m = creator(entry)
|
||||||
|
if not m:
|
||||||
|
m = []
|
||||||
|
m = [x.text for x in m]
|
||||||
|
return m
|
||||||
|
|
||||||
|
def get_author_sort(self, entry, verbose):
|
||||||
|
for x in creator(entry):
|
||||||
|
for key, val in x.attrib.items():
|
||||||
|
if key.endswith('file-as'):
|
||||||
|
return val
|
||||||
|
|
||||||
|
def get_identifiers(self, entry, mi):
|
||||||
|
isbns = []
|
||||||
|
for x in identifier(entry):
|
||||||
|
t = str(x.text).strip()
|
||||||
|
if t[:5].upper() in ('ISBN:', 'LCCN:', 'OCLC:'):
|
||||||
|
if t[:5].upper() == 'ISBN:':
|
||||||
|
isbns.append(t[5:])
|
||||||
|
if isbns:
|
||||||
|
mi.isbn = sorted(isbns, cmp=lambda x,y:cmp(len(x), len(y)))[-1]
|
||||||
|
|
||||||
|
def get_tags(self, entry, verbose):
|
||||||
|
try:
|
||||||
|
tags = [x.text for x in subject(entry)]
|
||||||
|
except:
|
||||||
|
report(verbose)
|
||||||
|
tags = []
|
||||||
|
return tags
|
||||||
|
|
||||||
|
def get_publisher(self, entry, verbose):
|
||||||
|
try:
|
||||||
|
pub = publisher(entry)[0].text
|
||||||
|
except:
|
||||||
|
pub = None
|
||||||
|
return pub
|
||||||
|
|
||||||
|
def get_date(self, entry, verbose):
|
||||||
|
try:
|
||||||
|
d = date(entry)
|
||||||
|
if d:
|
||||||
|
d = parser.parse(d[0].text)
|
||||||
|
else:
|
||||||
|
d = None
|
||||||
|
except:
|
||||||
|
report(verbose)
|
||||||
|
d = None
|
||||||
|
return d
|
||||||
|
|
||||||
|
def populate(self, entries, browser, verbose=False):
|
||||||
|
for x in entries:
|
||||||
|
try:
|
||||||
|
id_url = entry_id(x)[0].text
|
||||||
|
title = self.get_title(x)
|
||||||
|
except:
|
||||||
|
report(verbose)
|
||||||
|
mi = MetaInformation(title, self.get_authors(x))
|
||||||
|
try:
|
||||||
|
raw = browser.open(id_url).read()
|
||||||
|
feed = etree.fromstring(raw)
|
||||||
|
x = entry(feed)[0]
|
||||||
|
except Exception, e:
|
||||||
|
if verbose:
|
||||||
|
print 'Failed to get all details for an entry'
|
||||||
|
print e
|
||||||
|
mi.author_sort = self.get_author_sort(x, verbose)
|
||||||
|
mi.comments = self.get_description(x, verbose)
|
||||||
|
self.get_identifiers(x, mi)
|
||||||
|
mi.tags = self.get_tags(x, verbose)
|
||||||
|
mi.publisher = self.get_publisher(x, verbose)
|
||||||
|
mi.timestamp = self.get_date(x, verbose)
|
||||||
|
mi.language = self.get_language(x, verbose)
|
||||||
|
self.append(mi)
|
||||||
|
|
||||||
|
|
||||||
|
def search(title=None, author=None, publisher=None, isbn=None,
|
||||||
|
min_viewability='none', verbose=False, max_results=40):
|
||||||
|
br = browser()
|
||||||
|
start, entries = 1, []
|
||||||
|
while start > 0 and len(entries) <= max_results:
|
||||||
|
new, start = Query(title=title, author=author, publisher=publisher,
|
||||||
|
isbn=isbn, min_viewability=min_viewability)(br, verbose)
|
||||||
|
if not new:
|
||||||
|
break
|
||||||
|
entries.extend(new)
|
||||||
|
|
||||||
|
entries = entries[:max_results]
|
||||||
|
|
||||||
|
ans = ResultList()
|
||||||
|
ans.populate(entries, br, verbose)
|
||||||
|
ans.sort(cmp=lambda x, y:cmp(len(x.comments if x.comments else ''),
|
||||||
|
len(x.comments if x.comments else '')),
|
||||||
|
reverse=True)
|
||||||
|
return ans
|
||||||
|
|
||||||
|
def option_parser():
|
||||||
|
parser = OptionParser(textwrap.dedent(
|
||||||
|
'''\
|
||||||
|
%prog [options]
|
||||||
|
|
||||||
|
Fetch book metadata from Google. You must specify one of title, author,
|
||||||
|
publisher or ISBN. If you specify ISBN the others are ignored. Will
|
||||||
|
fetch a maximum of 100 matches, so you should make your query as
|
||||||
|
specific as possible.
|
||||||
|
'''
|
||||||
|
))
|
||||||
|
parser.add_option('-t', '--title', help='Book title')
|
||||||
|
parser.add_option('-a', '--author', help='Book author(s)')
|
||||||
|
parser.add_option('-p', '--publisher', help='Book publisher')
|
||||||
|
parser.add_option('-i', '--isbn', help='Book ISBN')
|
||||||
|
parser.add_option('-m', '--max-results', default=10,
|
||||||
|
help='Maximum number of results to fetch')
|
||||||
|
parser.add_option('-v', '--verbose', default=0, action='count',
|
||||||
|
help='Be more verbose about errors')
|
||||||
|
return parser
|
||||||
|
|
||||||
|
def main(args=sys.argv):
|
||||||
|
parser = option_parser()
|
||||||
|
opts, args = parser.parse_args(args)
|
||||||
|
try:
|
||||||
|
results = search(opts.title, opts.author, opts.publisher, opts.isbn,
|
||||||
|
verbose=opts.verbose, max_results=opts.max_results)
|
||||||
|
except AssertionError:
|
||||||
|
report(True)
|
||||||
|
parser.print_help()
|
||||||
|
return 1
|
||||||
|
for result in results:
|
||||||
|
print unicode(result).encode(preferred_encoding)
|
||||||
|
print
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
sys.exit(main())
|
@ -93,6 +93,7 @@ class Main(MainWindow, Ui_MainWindow):
|
|||||||
self.viewers = collections.deque()
|
self.viewers = collections.deque()
|
||||||
self.content_server = None
|
self.content_server = None
|
||||||
self.system_tray_icon = QSystemTrayIcon(QIcon(':/library'), self)
|
self.system_tray_icon = QSystemTrayIcon(QIcon(':/library'), self)
|
||||||
|
self.system_tray_icon.setObjectName('calibre')
|
||||||
if not config['systray_icon']:
|
if not config['systray_icon']:
|
||||||
self.system_tray_icon.hide()
|
self.system_tray_icon.hide()
|
||||||
else:
|
else:
|
||||||
|
@ -62,13 +62,14 @@ entry_points = {
|
|||||||
'comic2lrf = calibre.ebooks.lrf.comic.convert_from:main',
|
'comic2lrf = calibre.ebooks.lrf.comic.convert_from:main',
|
||||||
'comic2epub = calibre.ebooks.epub.from_comic:main',
|
'comic2epub = calibre.ebooks.epub.from_comic:main',
|
||||||
'comic2mobi = calibre.ebooks.mobi.from_comic:main',
|
'comic2mobi = calibre.ebooks.mobi.from_comic:main',
|
||||||
'comic2pdf = calibre.ebooks.pdf.from_comic:main',
|
'comic2pdf = calibre.ebooks.pdf.from_comic:main',
|
||||||
'calibre-debug = calibre.debug:main',
|
'calibre-debug = calibre.debug:main',
|
||||||
'calibredb = calibre.library.cli:main',
|
'calibredb = calibre.library.cli:main',
|
||||||
'calibre-fontconfig = calibre.utils.fontconfig:main',
|
'calibre-fontconfig = calibre.utils.fontconfig:main',
|
||||||
'calibre-parallel = calibre.parallel:main',
|
'calibre-parallel = calibre.parallel:main',
|
||||||
'calibre-customize = calibre.customize.ui:main',
|
'calibre-customize = calibre.customize.ui:main',
|
||||||
'pdftrim = calibre.ebooks.pdf.pdftrim:main' ,
|
'pdftrim = calibre.ebooks.pdf.pdftrim:main' ,
|
||||||
|
'google-books = calibre.ebooks.metadata.google_books:main',
|
||||||
],
|
],
|
||||||
'gui_scripts' : [
|
'gui_scripts' : [
|
||||||
__appname__+' = calibre.gui2.main:main',
|
__appname__+' = calibre.gui2.main:main',
|
||||||
@ -196,7 +197,8 @@ def setup_completion(fatal_errors):
|
|||||||
from calibre.ebooks.epub.from_comic import option_parser as comic2epub
|
from calibre.ebooks.epub.from_comic import option_parser as comic2epub
|
||||||
from calibre.ebooks.mobi.from_any import option_parser as any2mobi
|
from calibre.ebooks.mobi.from_any import option_parser as any2mobi
|
||||||
from calibre.ebooks.mobi.writer import option_parser as oeb2mobi
|
from calibre.ebooks.mobi.writer import option_parser as oeb2mobi
|
||||||
from calibre.gui2.main import option_parser as guiop
|
from calibre.gui2.main import option_parser as guiop
|
||||||
|
from calibre.ebooks.metadata.google_books import option_parser as gbop
|
||||||
any_formats = ['epub', 'htm', 'html', 'xhtml', 'xhtm', 'rar', 'zip',
|
any_formats = ['epub', 'htm', 'html', 'xhtml', 'xhtm', 'rar', 'zip',
|
||||||
'txt', 'lit', 'rtf', 'pdf', 'prc', 'mobi', 'fb2', 'odt']
|
'txt', 'lit', 'rtf', 'pdf', 'prc', 'mobi', 'fb2', 'odt']
|
||||||
f = open_file('/etc/bash_completion.d/libprs500')
|
f = open_file('/etc/bash_completion.d/libprs500')
|
||||||
@ -244,6 +246,7 @@ def setup_completion(fatal_errors):
|
|||||||
f.write(opts_and_words('feeds2lrf', feeds2lrf, feed_titles))
|
f.write(opts_and_words('feeds2lrf', feeds2lrf, feed_titles))
|
||||||
f.write(opts_and_words('feeds2epub', feeds2epub, feed_titles))
|
f.write(opts_and_words('feeds2epub', feeds2epub, feed_titles))
|
||||||
f.write(opts_and_words('feeds2mobi', feeds2mobi, feed_titles))
|
f.write(opts_and_words('feeds2mobi', feeds2mobi, feed_titles))
|
||||||
|
f.write(opts_and_words('google-books', gbop, []))
|
||||||
f.write(opts_and_exts('html2epub', html2epub, ['html', 'htm', 'xhtm', 'xhtml', 'opf']))
|
f.write(opts_and_exts('html2epub', html2epub, ['html', 'htm', 'xhtm', 'xhtml', 'opf']))
|
||||||
f.write(opts_and_exts('html2oeb', html2oeb, ['html', 'htm', 'xhtm', 'xhtml']))
|
f.write(opts_and_exts('html2oeb', html2oeb, ['html', 'htm', 'xhtm', 'xhtml']))
|
||||||
f.write(opts_and_exts('odt2oeb', odt2oeb, ['odt']))
|
f.write(opts_and_exts('odt2oeb', odt2oeb, ['odt']))
|
||||||
|
Loading…
x
Reference in New Issue
Block a user