Merge from main branch

This commit is contained in:
Tom Scholl 2011-04-06 23:00:16 +00:00
commit b3413d8226
26 changed files with 644 additions and 144 deletions

View File

@ -1,5 +1,5 @@
__license__ = 'GPL v3'
__copyright__ = '2010, Darko Miletic <darko.miletic at gmail.com>'
__copyright__ = '2010-2011, Darko Miletic <darko.miletic at gmail.com>'
'''
www.kommersant.ru
'''
@ -20,7 +20,13 @@ class Kommersant_ru(BasicNewsRecipe):
language = 'ru'
publication_type = 'newspaper'
masthead_url = 'http://www.kommersant.ru/CorpPics/logo_daily_1.gif'
extra_css = ' @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)} body{font-family: Arial, sans1, sans-serif} span#ctl00_ContentPlaceHolderStyle_LabelSubTitle{margin-bottom: 1em; display: block} .author{margin-bottom: 1em; display: block} .paragraph{margin-bottom: 1em; display: block} .vvodka{font-weight: bold; margin-bottom: 1em} '
extra_css = """
@font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)}
body{font-family: Tahoma, Arial, Helvetica, sans1, sans-serif}
.title{font-size: x-large; font-weight: bold; margin-bottom: 1em}
.subtitle{font-size: large; margin-bottom: 1em}
.document_vvodka{font-weight: bold; margin-bottom: 1em}
"""
conversion_options = {
'comment' : description
@ -29,14 +35,11 @@ class Kommersant_ru(BasicNewsRecipe):
, 'language' : language
}
keep_only_tags = [
dict(attrs={'id':'ctl00_ContentPlaceHolderStyle_PanelHeader'})
,dict(attrs={'class':['vvodka','paragraph','author']})
]
remove_tags = [dict(name=['iframe','object','link','img','base'])]
keep_only_tags = [dict(attrs={'class':['document','document_vvodka','document_text','document_authors vblock']})]
remove_tags = [dict(name=['iframe','object','link','img','base','meta'])]
feeds = [(u'Articles', u'http://feeds.kommersant.ru/RSS_Export/RU/daily.xml')]
def print_version(self, url):
return url.replace('doc-rss.aspx','doc.aspx') + '&print=true'
def print_version(self, url):
return url.replace('/doc-rss/','/Doc/') + '/Print'

View File

@ -217,14 +217,25 @@ def filename_to_utf8(name):
return name.decode(codec, 'replace').encode('utf8')
def extract(path, dir):
ext = os.path.splitext(path)[1][1:].lower()
extractor = None
if ext in ['zip', 'cbz', 'epub', 'oebzip']:
from calibre.libunzip import extract as zipextract
extractor = zipextract
elif ext in ['cbr', 'rar']:
# First use the file header to identify its type
with open(path, 'rb') as f:
id_ = f.read(3)
if id_ == b'Rar':
from calibre.libunrar import extract as rarextract
extractor = rarextract
elif id_.startswith(b'PK'):
from calibre.libunzip import extract as zipextract
extractor = zipextract
if extractor is None:
# Fallback to file extension
ext = os.path.splitext(path)[1][1:].lower()
if ext in ['zip', 'cbz', 'epub', 'oebzip']:
from calibre.libunzip import extract as zipextract
extractor = zipextract
elif ext in ['cbr', 'rar']:
from calibre.libunrar import extract as rarextract
extractor = rarextract
if extractor is None:
raise Exception('Unknown archive type')
extractor(path, dir)

View File

@ -10,6 +10,7 @@ from calibre.constants import numeric_version
from calibre.ebooks.metadata.archive import ArchiveExtract, get_cbz_metadata
from calibre.ebooks.metadata.opf2 import metadata_to_opf
from calibre.ebooks.oeb.base import OEB_IMAGES
from calibre.utils.config import test_eight_code
# To archive plugins {{{
class HTML2ZIP(FileTypePlugin):
@ -166,6 +167,14 @@ class ComicMetadataReader(MetadataReaderPlugin):
description = _('Extract cover from comic files')
def get_metadata(self, stream, ftype):
if hasattr(stream, 'seek') and hasattr(stream, 'tell'):
pos = stream.tell()
id_ = stream.read(3)
stream.seek(pos)
if id_ == b'Rar':
ftype = 'cbr'
elif id.startswith(b'PK'):
ftype = 'cbz'
if ftype == 'cbr':
from calibre.libunrar import extract_first_alphabetically as extract_first
extract_first
@ -604,20 +613,34 @@ from calibre.devices.folder_device.driver import FOLDER_DEVICE_FOR_CONFIG
from calibre.devices.kobo.driver import KOBO
from calibre.devices.bambook.driver import BAMBOOK
from calibre.ebooks.metadata.fetch import GoogleBooks, ISBNDB, Amazon, \
KentDistrictLibrary
from calibre.ebooks.metadata.douban import DoubanBooks
from calibre.ebooks.metadata.nicebooks import NiceBooks, NiceBooksCovers
from calibre.ebooks.metadata.covers import OpenLibraryCovers, \
AmazonCovers, DoubanCovers
from calibre.library.catalog import CSV_XML, EPUB_MOBI, BIBTEX
from calibre.ebooks.epub.fix.unmanifested import Unmanifested
from calibre.ebooks.epub.fix.epubcheck import Epubcheck
plugins = [HTML2ZIP, PML2PMLZ, TXT2TXTZ, ArchiveExtract, GoogleBooks, ISBNDB, Amazon,
KentDistrictLibrary, DoubanBooks, NiceBooks, CSV_XML, EPUB_MOBI, BIBTEX, Unmanifested,
Epubcheck, OpenLibraryCovers, AmazonCovers, DoubanCovers,
NiceBooksCovers]
plugins = [HTML2ZIP, PML2PMLZ, TXT2TXTZ, ArchiveExtract, CSV_XML, EPUB_MOBI, BIBTEX, Unmanifested,
Epubcheck, ]
if test_eight_code:
# New metadata download plugins {{{
from calibre.ebooks.metadata.sources.google import GoogleBooks
from calibre.ebooks.metadata.sources.amazon import Amazon
from calibre.ebooks.metadata.sources.openlibrary import OpenLibrary
plugins += [GoogleBooks, Amazon, OpenLibrary]
# }}}
else:
from calibre.ebooks.metadata.fetch import GoogleBooks, ISBNDB, Amazon, \
KentDistrictLibrary
from calibre.ebooks.metadata.douban import DoubanBooks
from calibre.ebooks.metadata.nicebooks import NiceBooks, NiceBooksCovers
from calibre.ebooks.metadata.covers import OpenLibraryCovers, \
AmazonCovers, DoubanCovers
plugins += [GoogleBooks, ISBNDB, Amazon,
OpenLibraryCovers, AmazonCovers, DoubanCovers,
NiceBooksCovers, KentDistrictLibrary, DoubanBooks, NiceBooks]
plugins += [
ComicInput,
EPUBInput,
@ -1055,11 +1078,4 @@ plugins += [LookAndFeel, Behavior, Columns, Toolbar, Search, InputOptions,
#}}}
# New metadata download plugins {{{
from calibre.ebooks.metadata.sources.google import GoogleBooks
from calibre.ebooks.metadata.sources.amazon import Amazon
from calibre.ebooks.metadata.sources.openlibrary import OpenLibrary
plugins += [GoogleBooks, Amazon, OpenLibrary]
# }}}

View File

@ -36,7 +36,9 @@ class ANDROID(USBMS):
# Motorola
0x22b8 : { 0x41d9 : [0x216], 0x2d61 : [0x100], 0x2d67 : [0x100],
0x41db : [0x216], 0x4285 : [0x216], 0x42a3 : [0x216],
0x4286 : [0x216], 0x42b3 : [0x216], 0x42b4 : [0x216] },
0x4286 : [0x216], 0x42b3 : [0x216], 0x42b4 : [0x216],
0x7086 : [0x0226],
},
# Sony Ericsson
0xfce : { 0xd12e : [0x0100]},
@ -101,7 +103,8 @@ class ANDROID(USBMS):
'SCH-I500_CARD', 'SPH-D700_CARD', 'MB810', 'GT-P1000', 'DESIRE',
'SGH-T849', '_MB300', 'A70S', 'S_ANDROID', 'A101IT', 'A70H',
'IDEOS_TABLET', 'MYTOUCH_4G', 'UMS_COMPOSITE', 'SCH-I800_CARD',
'7', 'A956', 'A955', 'A43', 'ANDROID_PLATFORM', 'TEGRA_2']
'7', 'A956', 'A955', 'A43', 'ANDROID_PLATFORM', 'TEGRA_2',
'MB860']
WINDOWS_CARD_A_MEM = ['ANDROID_PHONE', 'GT-I9000_CARD', 'SGH-I897',
'FILE-STOR_GADGET', 'SGH-T959', 'SAMSUNG_ANDROID', 'GT-P1000_CARD',
'A70S', 'A101IT', '7']

View File

@ -12,10 +12,13 @@ Transform OEB content into a single (more or less) HTML file.
import os
from urlparse import urlparse
from functools import partial
from lxml import html
from urlparse import urldefrag
from calibre import prepare_string_for_xml
from calibre.ebooks.oeb.base import XHTML, XHTML_NS, barename, namespace
from calibre.ebooks.oeb.base import XHTML, XHTML_NS, barename, namespace,\
OEB_IMAGES, XLINK, rewrite_links
from calibre.ebooks.oeb.stylizer import Stylizer
from calibre.utils.logging import default_log
@ -40,6 +43,8 @@ class OEB2HTML(object):
self.opts = opts
self.links = {}
self.images = {}
self.base_hrefs = [item.href for item in oeb_book.spine]
self.map_resources(oeb_book)
return self.mlize_spine(oeb_book)
@ -47,6 +52,8 @@ class OEB2HTML(object):
output = [u'<html><body><head><meta http-equiv="Content-Type" content="text/html;charset=utf-8" /></head>']
for item in oeb_book.spine:
self.log.debug('Converting %s to HTML...' % item.href)
self.rewrite_ids(item.data, item)
rewrite_links(item.data, partial(self.rewrite_link, page=item))
stylizer = Stylizer(item.data, item.href, oeb_book, self.opts)
output += self.dump_text(item.data.find(XHTML('body')), stylizer, item)
output.append('\n\n')
@ -56,43 +63,61 @@ class OEB2HTML(object):
def dump_text(self, elem, stylizer, page):
raise NotImplementedError
def get_link_id(self, href, aid):
aid = '%s#%s' % (href, aid)
if aid not in self.links:
self.links[aid] = 'calibre_link-%s' % len(self.links.keys())
return self.links[aid]
def get_link_id(self, href, id=''):
if id:
href += '#%s' % id
if href not in self.links:
self.links[href] = '#calibre_link-%s' % len(self.links.keys())
return self.links[href]
def rewrite_link(self, tag, attribs, page):
# Rewrite ids.
if 'id' in attribs:
attribs['id'] = self.get_link_id(page.href, attribs['id'])
# Rewrite links.
if tag == 'a' and 'href' in attribs:
href = page.abshref(attribs['href'])
if self.url_is_relative(href):
id = ''
if '#' in href:
href, n, id = href.partition('#')
href = '#%s' % self.get_link_id(href, id)
attribs['href'] = href
return attribs
def rewrite_image(self, tag, attribs, page):
if tag == 'img':
src = attribs.get('src', None)
if src:
src = page.abshref(src)
if src not in self.images:
ext = os.path.splitext(src)[1]
def map_resources(self, oeb_book):
for item in oeb_book.manifest:
if item.media_type in OEB_IMAGES:
if item.href not in self.images:
ext = os.path.splitext(item.href)[1]
fname = '%s%s' % (len(self.images), ext)
fname = fname.zfill(10)
self.images[src] = fname
attribs['src'] = 'images/%s' % self.images[src]
return attribs
self.images[item.href] = fname
if item in oeb_book.spine:
self.get_link_id(item.href)
root = item.data.find(XHTML('body'))
link_attrs = set(html.defs.link_attrs)
link_attrs.add(XLINK('href'))
for el in root.iter():
attribs = el.attrib
try:
if not isinstance(el.tag, basestring):
continue
except:
continue
for attr in attribs:
if attr in link_attrs:
href = item.abshref(attribs[attr])
href, id = urldefrag(href)
if href in self.base_hrefs:
self.get_link_id(href, id)
def url_is_relative(self, url):
o = urlparse(url)
return False if o.scheme else True
def rewrite_link(self, url, page=None):
if not page:
return url
abs_url = page.abshref(url)
if abs_url in self.images:
return 'images/%s' % self.images[abs_url]
if abs_url in self.links:
return self.links[abs_url]
return url
def rewrite_ids(self, root, page):
for el in root.iter():
try:
tag = el.tag
except UnicodeDecodeError:
continue
if tag == XHTML('body'):
el.attrib['id'] = self.get_link_id(page.href)[1:]
continue
if 'id' in el.attrib:
el.attrib['id'] = self.get_link_id(page.href, el.attrib['id'])[1:]
def get_css(self, oeb_book):
css = u''
@ -130,12 +155,8 @@ class OEB2HTMLNoCSSizer(OEB2HTML):
tag = barename(elem.tag)
attribs = elem.attrib
attribs = self.rewrite_link(tag, attribs, page)
attribs = self.rewrite_image(tag, attribs, page)
if tag == 'body':
tag = 'div'
attribs['id'] = self.get_link_id(page.href, '')
tags.append(tag)
# Ignore anything that is set to not be displayed.
@ -218,13 +239,9 @@ class OEB2HTMLInlineCSSizer(OEB2HTML):
tag = barename(elem.tag)
attribs = elem.attrib
attribs = self.rewrite_link(tag, attribs, page)
attribs = self.rewrite_image(tag, attribs, page)
style_a = '%s' % style
if tag == 'body':
tag = 'div'
attribs['id'] = self.get_link_id(page.href, '')
if not style['page-break-before'] == 'always':
style_a = 'page-break-before: always;' + ' ' if style_a else '' + style_a
tags.append(tag)
@ -279,6 +296,8 @@ class OEB2HTMLClassCSSizer(OEB2HTML):
output = []
for item in oeb_book.spine:
self.log.debug('Converting %s to HTML...' % item.href)
self.rewrite_ids(item.data, item)
rewrite_links(item.data, partial(self.rewrite_link, page=item))
stylizer = Stylizer(item.data, item.href, oeb_book, self.opts)
output += self.dump_text(item.data.find(XHTML('body')), stylizer, item)
output.append('\n\n')
@ -306,17 +325,12 @@ class OEB2HTMLClassCSSizer(OEB2HTML):
# Setup our variables.
text = ['']
#style = stylizer.style(elem)
tags = []
tag = barename(elem.tag)
attribs = elem.attrib
attribs = self.rewrite_link(tag, attribs, page)
attribs = self.rewrite_image(tag, attribs, page)
if tag == 'body':
tag = 'div'
attribs['id'] = self.get_link_id(page.href, '')
tags.append(tag)
# Remove attributes we won't want.

View File

@ -125,7 +125,10 @@ class Metadata(object):
_data = object.__getattribute__(self, '_data')
if field in TOP_LEVEL_IDENTIFIERS:
field, val = self._clean_identifier(field, val)
_data['identifiers'].update({field: val})
identifiers = _data['identifiers']
identifiers.pop(field, None)
if val:
identifiers[field] = val
elif field == 'identifiers':
if not val:
val = copy.copy(NULL_VALUES.get('identifiers', None))
@ -224,8 +227,7 @@ class Metadata(object):
identifiers = object.__getattribute__(self,
'_data')['identifiers']
if not val and typ in identifiers:
identifiers.pop(typ)
identifiers.pop(typ, None)
if val:
identifiers[typ] = val
@ -647,7 +649,7 @@ class Metadata(object):
fmt('Tags', u', '.join([unicode(t) for t in self.tags]))
if self.series:
fmt('Series', self.series + ' #%s'%self.format_series_index())
if self.language:
if not self.is_null('language'):
fmt('Language', self.language)
if self.rating is not None:
fmt('Rating', self.rating)

View File

@ -279,7 +279,7 @@ class Worker(Thread): # Get details {{{
class Amazon(Source):
name = 'Amazon'
name = 'Amazon Metadata'
description = _('Downloads metadata from Amazon')
capabilities = frozenset(['identify', 'cover'])
@ -493,9 +493,10 @@ class Amazon(Source):
if abort.is_set():
return
br = self.browser
log('Downloading cover from:', cached_url)
try:
cdata = br.open_novisit(cached_url, timeout=timeout).read()
result_queue.put(cdata)
result_queue.put((self, cdata))
except:
log.exception('Failed to download cover from:', cached_url)
# }}}

View File

@ -15,6 +15,7 @@ from calibre.customize import Plugin
from calibre.utils.logging import ThreadSafeLog, FileStream
from calibre.utils.config import JSONConfig
from calibre.utils.titlecase import titlecase
from calibre.utils.icu import capitalize, lower
from calibre.ebooks.metadata import check_isbn
msprefs = JSONConfig('metadata_sources/global.json')
@ -22,6 +23,12 @@ msprefs.defaults['txt_comments'] = False
msprefs.defaults['ignore_fields'] = []
msprefs.defaults['max_tags'] = 20
msprefs.defaults['wait_after_first_identify_result'] = 30 # seconds
msprefs.defaults['wait_after_first_cover_result'] = 60 # seconds
# Google covers are often poor quality (scans/errors) but they have high
# resolution, so they trump covers from better sources. So make sure they
# are only used if no other covers are found.
msprefs.defaults['cover_priorities'] = {'Google':2}
def create_log(ostream=None):
log = ThreadSafeLog(level=ThreadSafeLog.DEBUG)
@ -101,6 +108,30 @@ def get_cached_cover_urls(mi):
if url:
yield (p, url)
def cap_author_token(token):
lt = lower(token)
if lt in ('von', 'de', 'el', 'van', 'le'):
return lt
if re.match(r'([a-z]\.){2,}$', lt) is not None:
# Normalize tokens of the form J.K. to J. K.
parts = token.split('.')
return '. '.join(map(capitalize, parts)).strip()
return capitalize(token)
def fixauthors(authors):
if not authors:
return authors
ans = []
for x in authors:
ans.append(' '.join(map(cap_author_token, x.split())))
return ans
def fixcase(x):
if x:
x = titlecase(x)
return x
class Source(Plugin):
@ -253,13 +284,9 @@ class Source(Plugin):
before putting the Metadata object into result_queue. You can of
course, use a custom algorithm suited to your metadata source.
'''
def fixcase(x):
if x:
x = titlecase(x)
return x
if mi.title:
mi.title = fixcase(mi.title)
mi.authors = list(map(fixcase, mi.authors))
mi.authors = fixauthors(mi.authors)
mi.tags = list(map(fixcase, mi.tags))
mi.isbn = check_isbn(mi.isbn)
@ -340,7 +367,8 @@ class Source(Plugin):
title=None, authors=None, identifiers={}, timeout=30):
'''
Download a cover and put it into result_queue. The parameters all have
the same meaning as for :meth:`identify`.
the same meaning as for :meth:`identify`. Put (self, cover_data) into
result_queue.
This method should use cached cover URLs for efficiency whenever
possible. When cached data is not present, most plugins simply call

View File

@ -0,0 +1,105 @@
#!/usr/bin/env python
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
from __future__ import (unicode_literals, division, absolute_import,
print_function)
__license__ = 'GPL v3'
__copyright__ = '2011, Kovid Goyal <kovid@kovidgoyal.net>'
__docformat__ = 'restructuredtext en'
import sys, textwrap
from io import BytesIO
from threading import Event
from calibre import prints
from calibre.utils.config import OptionParser
from calibre.utils.magick.draw import save_cover_data_to
from calibre.ebooks.metadata import string_to_authors
from calibre.ebooks.metadata.opf2 import metadata_to_opf
from calibre.ebooks.metadata.sources.base import create_log
from calibre.ebooks.metadata.sources.identify import identify
from calibre.ebooks.metadata.sources.covers import download_cover
from calibre.utils.config import test_eight_code
def option_parser():
if not test_eight_code:
from calibre.ebooks.metadata.fetch import option_parser
return option_parser()
parser = OptionParser(textwrap.dedent(
'''\
%prog [options]
Fetch book metadata from online sources. You must specify at least one
of title, authors or ISBN.
'''
))
parser.add_option('-t', '--title', help='Book title')
parser.add_option('-a', '--authors', help='Book author(s)')
parser.add_option('-i', '--isbn', help='Book ISBN')
parser.add_option('-v', '--verbose', default=False, action='store_true',
help='Print the log to the console (stderr)')
parser.add_option('-o', '--opf', help='Output the metadata in OPF format')
parser.add_option('-c', '--cover',
help='Specify a filename. The cover, if available, will be saved to it')
parser.add_option('-d', '--timeout', default='30',
help='Timeout in seconds. Default is 30')
return parser
def main(args=sys.argv):
if not test_eight_code:
from calibre.ebooks.metadata.fetch import main
return main(args)
parser = option_parser()
opts, args = parser.parse_args(args)
buf = BytesIO()
log = create_log(buf)
abort = Event()
authors = []
if opts.authors:
authors = string_to_authors(opts.authors)
identifiers = {}
if opts.isbn:
identifiers['isbn'] = opts.isbn
results = identify(log, abort, title=opts.title, authors=authors,
identifiers=identifiers, timeout=int(opts.timeout))
if not results:
print (log, file=sys.stderr)
prints('No results found', file=sys.stderr)
raise SystemExit(1)
result = results[0]
cf = None
if opts.cover and results:
cover = download_cover(log, title=opts.title, authors=authors,
identifiers=result.identifiers, timeout=int(opts.timeout))
if cover is None:
prints('No cover found', file=sys.stderr)
else:
save_cover_data_to(cover[-1], opts.cover)
result.cover = cf = opts.cover
log = buf.getvalue()
result = (metadata_to_opf(result) if opts.opf else
unicode(result).encode('utf-8'))
if opts.verbose:
print (log, file=sys.stderr)
print (result)
if not opts.opf and opts.cover:
prints('Cover :', cf)
return 0
if __name__ == '__main__':
sys.exit(main())

View File

@ -0,0 +1,178 @@
#!/usr/bin/env python
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
from __future__ import (unicode_literals, division, absolute_import,
print_function)
__license__ = 'GPL v3'
__copyright__ = '2011, Kovid Goyal <kovid@kovidgoyal.net>'
__docformat__ = 'restructuredtext en'
import time
from Queue import Queue, Empty
from threading import Thread, Event
from io import BytesIO
from calibre.customize.ui import metadata_plugins
from calibre.ebooks.metadata.sources.base import msprefs, create_log
from calibre.utils.magick.draw import Image, save_cover_data_to
class Worker(Thread):
def __init__(self, plugin, abort, title, authors, identifiers, timeout, rq):
Thread.__init__(self)
self.daemon = True
self.plugin = plugin
self.abort = abort
self.buf = BytesIO()
self.log = create_log(self.buf)
self.title, self.authors, self.identifiers = (title, authors,
identifiers)
self.timeout, self.rq = timeout, rq
self.time_spent = None
def run(self):
start_time = time.time()
if not self.abort.is_set():
try:
self.plugin.download_cover(self.log, self.rq, self.abort,
title=self.title, authors=self.authors,
identifiers=self.identifiers, timeout=self.timeout)
except:
self.log.exception('Failed to download cover from',
self.plugin.name)
self.time_spent = time.time() - start_time
def is_worker_alive(workers):
for w in workers:
if w.is_alive():
return True
return False
def process_result(log, result):
plugin, data = result
try:
im = Image()
im.load(data)
im.trim(10)
width, height = im.size
fmt = im.format
if width < 50 or height < 50:
raise ValueError('Image too small')
data = save_cover_data_to(im, '/cover.jpg', return_data=True)
except:
log.exception('Invalid cover from', plugin.name)
return None
return (plugin, width, height, fmt, data)
def run_download(log, results, abort,
title=None, authors=None, identifiers={}, timeout=30):
'''
Run the cover download, putting results into the queue :param:`results`.
Each result is a tuple of the form:
(plugin, width, height, fmt, bytes)
'''
plugins = list(metadata_plugins(['cover']))
rq = Queue()
workers = [Worker(p, abort, title, authors, identifiers, timeout, rq) for p
in plugins]
for w in workers:
w.start()
first_result_at = None
wait_time = msprefs['wait_after_first_cover_result']
found_results = {}
while True:
time.sleep(0.1)
try:
x = rq.get_nowait()
result = process_result(log, x)
if result is not None:
results.put(result)
found_results[result[0]] = result
if first_result_at is not None:
first_result_at = time.time()
except Empty:
pass
if not is_worker_alive(workers):
break
if first_result_at is not None and time.time() - first_result_at > wait_time:
log('Not waiting for any more results')
abort.set()
if abort.is_set():
break
while True:
try:
x = rq.get_nowait()
result = process_result(log, x)
if result is not None:
results.put(result)
found_results[result[0]] = result
except Empty:
break
for w in workers:
wlog = w.buf.getvalue().strip()
log('\n'+'*'*30, w.plugin.name, 'Covers', '*'*30)
log('Request extra headers:', w.plugin.browser.addheaders)
if w.plugin in found_results:
result = found_results[w.plugin]
log('Downloaded cover:', '%dx%d'%(result[1], result[2]))
else:
log('Failed to download valid cover')
if w.time_spent is None:
log('Download aborted')
else:
log('Took', w.time_spent, 'seconds')
if wlog:
log(wlog)
log('\n'+'*'*80)
def download_cover(log,
title=None, authors=None, identifiers={}, timeout=30):
'''
Synchronous cover download. Returns the "best" cover as per user
prefs/cover resolution.
Return cover is a tuple: (plugin, width, height, fmt, data)
Returns None if no cover is found.
'''
rq = Queue()
abort = Event()
run_download(log, rq, abort, title=title, authors=authors,
identifiers=identifiers, timeout=timeout)
results = []
while True:
try:
results.append(rq.get_nowait())
except Empty:
break
cp = msprefs['cover_priorities']
def keygen(result):
plugin, width, height, fmt, data = result
return (cp.get(plugin.name, 1), 1/(width*height))
results.sort(key=keygen)
return results[0] if results else None

View File

@ -145,15 +145,18 @@ def to_metadata(browser, log, entry_, timeout): # {{{
log.exception('Failed to parse rating')
# Cover
mi.has_google_cover = len(extra.xpath(
'//*[@rel="http://schemas.google.com/books/2008/thumbnail"]')) > 0
mi.has_google_cover = None
for x in extra.xpath(
'//*[@href and @rel="http://schemas.google.com/books/2008/thumbnail"]'):
mi.has_google_cover = x.get('href')
break
return mi
# }}}
class GoogleBooks(Source):
name = 'Google Books'
name = 'Google'
description = _('Downloads metadata from Google Books')
capabilities = frozenset(['identify', 'cover'])
@ -213,7 +216,7 @@ class GoogleBooks(Source):
results.sort(key=self.identify_results_keygen(
title=title, authors=authors, identifiers=identifiers))
for mi in results:
cached_url = self.cover_url_from_identifiers(mi.identifiers)
cached_url = self.get_cached_cover_url(mi.identifiers)
if cached_url is not None:
break
if cached_url is None:
@ -223,9 +226,10 @@ class GoogleBooks(Source):
if abort.is_set():
return
br = self.browser
log('Downloading cover from:', cached_url)
try:
cdata = br.open_novisit(cached_url, timeout=timeout).read()
result_queue.put(cdata)
result_queue.put((self, cdata))
except:
log.exception('Failed to download cover from:', cached_url)
@ -254,9 +258,9 @@ class GoogleBooks(Source):
goog = ans.identifiers['google']
for isbn in getattr(ans, 'all_isbns', []):
self.cache_isbn_to_identifier(isbn, goog)
if ans.has_google_cover:
self.cache_identifier_to_cover_url(goog,
self.GOOGLE_COVER%goog)
if ans.has_google_cover:
self.cache_identifier_to_cover_url(goog,
self.GOOGLE_COVER%goog)
self.clean_downloaded_metadata(ans)
result_queue.put(ans)
except:
@ -271,6 +275,9 @@ class GoogleBooks(Source):
identifiers={}, timeout=30):
query = self.create_query(log, title=title, authors=authors,
identifiers=identifiers)
if not query:
log.error('Insufficient metadata to construct query')
return
br = self.browser
try:
raw = br.open_novisit(query, timeout=timeout).read()

View File

@ -20,6 +20,7 @@ from calibre.ebooks.metadata.xisbn import xisbn
from calibre.ebooks.metadata.book.base import Metadata
from calibre.utils.date import utc_tz
from calibre.utils.html2text import html2text
from calibre.utils.icu import lower
# Download worker {{{
class Worker(Thread):
@ -97,11 +98,45 @@ class ISBNMerge(object):
if has_isbn_result:
self.merge_isbn_results()
else:
self.results = sorted(self.isbnless_results,
results = sorted(self.isbnless_results,
key=attrgetter('relevance_in_source'))
# Pick only the most relevant result from each source
self.results = []
seen = set()
for result in results:
if result.identify_plugin not in seen:
seen.add(result.identify_plugin)
self.results.append(result)
result.average_source_relevance = \
result.relevance_in_source
self.merge_metadata_results()
return self.results
def merge_metadata_results(self):
' Merge results with identical title and authors '
groups = {}
for result in self.results:
title = lower(result.title if result.title else '')
key = (title, tuple([lower(x) for x in result.authors]))
if key not in groups:
groups[key] = []
groups[key].append(result)
if len(groups) != len(self.results):
self.results = []
for rgroup in groups.itervalues():
rel = [r.average_source_relevance for r in rgroup]
if len(rgroup) > 1:
result = self.merge(rgroup, None, do_asr=False)
result.average_source_relevance = sum(rel)/len(rel)
else:
result = rgroup[0]
self.results.append(result)
self.results.sort(key=attrgetter('average_source_relevance'))
def merge_isbn_results(self):
self.results = []
for min_year, results in self.pools.itervalues():
@ -122,7 +157,7 @@ class ISBNMerge(object):
values = [getattr(x, attr) for x in results if not x.is_null(attr)]
return values[0] if values else null_value
def merge(self, results, min_year):
def merge(self, results, min_year, do_asr=True):
ans = Metadata(_('Unknown'))
# We assume the shortest title has the least cruft in it
@ -185,7 +220,8 @@ class ISBNMerge(object):
# Merge any other fields with no special handling (random merge)
touched_fields = set()
for r in results:
touched_fields |= r.identify_plugin.touched_fields
if hasattr(r, 'identify_plugin'):
touched_fields |= r.identify_plugin.touched_fields
for f in touched_fields:
if f.startswith('identifier:') or not ans.is_null(f):
@ -193,9 +229,10 @@ class ISBNMerge(object):
setattr(ans, f, self.random_merge(f, results,
null_value=getattr(ans, f)))
avg = [x.relevance_in_source for x in results]
avg = sum(avg)/len(avg)
ans.average_source_relevance = avg
if do_asr:
avg = [x.relevance_in_source for x in results]
avg = sum(avg)/len(avg)
ans.average_source_relevance = avg
return ans
@ -210,7 +247,8 @@ def merge_identify_results(result_map, log):
# }}}
def identify(log, abort, title=None, authors=None, identifiers={}, timeout=30):
def identify(log, abort, # {{{
title=None, authors=None, identifiers={}, timeout=30):
start_time = time.time()
plugins = list(metadata_plugins(['identify']))
@ -322,6 +360,7 @@ def identify(log, abort, title=None, authors=None, identifiers={}, timeout=30):
r.tags = r.tags[:max_tags]
return results
# }}}
if __name__ == '__main__': # tests {{{
# To run these test use: calibre-debug -e
@ -354,10 +393,10 @@ if __name__ == '__main__': # tests {{{
exact=True), authors_test(['Dan Brown'])]
),
( # No specific problems
{'identifiers':{'isbn': '0743273567'}},
[title_test('The great gatsby', exact=True),
authors_test(['Francis Scott Fitzgerald'])]
( # No ISBN
{'title':'Justine', 'authors':['Durrel']},
[title_test('Justine', exact=True),
authors_test(['Lawrence Durrel'])]
),
( # A newer book

View File

@ -26,7 +26,7 @@ class OpenLibrary(Source):
br = self.browser
try:
ans = br.open_novisit(self.OPENLIBRARY%isbn, timeout=timeout).read()
result_queue.put(ans)
result_queue.put((self, ans))
except Exception as e:
if callable(getattr(e, 'getcode', None)) and e.getcode() == 404:
log.error('No cover for ISBN: %r found'%isbn)

View File

@ -17,6 +17,7 @@ from calibre.gui2.dialogs.confirm_delete import confirm
from calibre.gui2.dialogs.tag_list_editor import TagListEditor
from calibre.gui2.actions import InterfaceAction
from calibre.utils.icu import sort_key
from calibre.utils.config import test_eight_code
class EditMetadataAction(InterfaceAction):
@ -133,8 +134,6 @@ class EditMetadataAction(InterfaceAction):
row_list = [r.row() for r in rows]
current_row = 0
changed = set([])
db = self.gui.library_view.model().db
if len(row_list) == 1:
cr = row_list[0]
@ -142,6 +141,24 @@ class EditMetadataAction(InterfaceAction):
list(range(self.gui.library_view.model().rowCount(QModelIndex())))
current_row = row_list.index(cr)
if test_eight_code:
changed = self.do_edit_metadata(row_list, current_row)
else:
changed = self.do_edit_metadata_old(row_list, current_row)
if changed:
self.gui.library_view.model().refresh_ids(list(changed))
current = self.gui.library_view.currentIndex()
m = self.gui.library_view.model()
if self.gui.cover_flow:
self.gui.cover_flow.dataChanged()
m.current_changed(current, previous)
self.gui.tags_view.recount()
def do_edit_metadata_old(self, row_list, current_row):
changed = set([])
db = self.gui.library_view.model().db
while True:
prev = next_ = None
if current_row > 0:
@ -167,15 +184,28 @@ class EditMetadataAction(InterfaceAction):
self.gui.library_view.set_current_row(current_row)
self.gui.library_view.scroll_to_row(current_row)
def do_edit_metadata(self, row_list, current_row):
from calibre.gui2.metadata.single import edit_metadata
db = self.gui.library_view.model().db
changed, rows_to_refresh = edit_metadata(db, row_list, current_row,
parent=self.gui, view_slot=self.view_format_callback,
set_current_callback=self.set_current_callback)
return changed
def set_current_callback(self, id_):
db = self.gui.library_view.model().db
current_row = db.row(id_)
self.gui.library_view.set_current_row(current_row)
self.gui.library_view.scroll_to_row(current_row)
def view_format_callback(self, id_, fmt):
view = self.gui.iactions['View']
if id_ is None:
view._view_file(fmt)
else:
db = self.gui.library_view.model().db
view.view_format(db.row(id_), fmt)
if changed:
self.gui.library_view.model().refresh_ids(list(changed))
current = self.gui.library_view.currentIndex()
m = self.gui.library_view.model()
if self.gui.cover_flow:
self.gui.cover_flow.dataChanged()
m.current_changed(current, previous)
self.gui.tags_view.recount()
def edit_bulk_metadata(self, checked):
'''

View File

@ -428,7 +428,7 @@ class Format(QListWidgetItem): # {{{
if timestamp is not None:
ts = timestamp.astimezone(local_tz)
t = strftime('%a, %d %b %Y [%H:%M:%S]', ts.timetuple())
text = _('Last modified: %s')%t
text = _('Last modified: %s\n\nDouble click to view')%t
self.setToolTip(text)
self.setStatusTip(text)
@ -577,8 +577,7 @@ class FormatsManager(QWidget): # {{{
self.changed = True
def show_format(self, item, *args):
fmt = item.ext
self.dialog.view_format.emit(fmt)
self.dialog.do_view_format(item.path, item.ext)
def get_selected_format_metadata(self, db, id_):
old = prefs['read_file_metadata']

View File

@ -26,15 +26,15 @@ from calibre.utils.config import tweaks
class MetadataSingleDialogBase(ResizableDialog):
view_format = pyqtSignal(object)
view_format = pyqtSignal(object, object)
cc_two_column = tweaks['metadata_single_use_2_cols_for_custom_fields']
one_line_comments_toolbar = False
def __init__(self, db, parent=None):
self.db = db
self.changed = set([])
self.books_to_refresh = set([])
self.rows_to_refresh = set([])
self.changed = set()
self.books_to_refresh = set()
self.rows_to_refresh = set()
ResizableDialog.__init__(self, parent)
def setupUi(self, *args): # {{{
@ -194,6 +194,13 @@ class MetadataSingleDialogBase(ResizableDialog):
pass # Do something
# }}}
def do_view_format(self, path, fmt):
if path:
self.view_format.emit(None, path)
else:
self.view_format.emit(self.book_id, fmt)
def do_layout(self):
raise NotImplementedError()
@ -204,6 +211,8 @@ class MetadataSingleDialogBase(ResizableDialog):
widget.initialize(self.db, id_)
for widget in getattr(self, 'custom_metadata_widgets', []):
widget.initialize(id_)
if callable(self.set_current_callback):
self.set_current_callback(id_)
# Commented out as it doesn't play nice with Next, Prev buttons
#self.fetch_metadata_button.setFocus(Qt.OtherFocusReason)
@ -339,11 +348,13 @@ class MetadataSingleDialogBase(ResizableDialog):
gprefs['metasingle_window_geometry3'] = bytearray(self.saveGeometry())
# Dialog use methods {{{
def start(self, row_list, current_row, view_slot=None):
def start(self, row_list, current_row, view_slot=None,
set_current_callback=None):
self.row_list = row_list
self.current_row = current_row
if view_slot is not None:
self.view_format.connect(view_slot)
self.set_current_callback = set_current_callback
self.do_one(apply_changes=False)
ret = self.exec_()
self.break_cycles()
@ -375,6 +386,7 @@ class MetadataSingleDialogBase(ResizableDialog):
def break_cycles(self):
# Break any reference cycles that could prevent python
# from garbage collecting this dialog
self.set_current_callback = self.db = None
def disconnect(signal):
try:
signal.disconnect()
@ -643,9 +655,11 @@ class MetadataSingleDialogAlt(MetadataSingleDialogBase): # {{{
# }}}
def edit_metadata(db, row_list, current_row, parent=None, view_slot=None):
def edit_metadata(db, row_list, current_row, parent=None, view_slot=None,
set_current_callback=None):
d = MetadataSingleDialog(db, parent)
d.start(row_list, current_row, view_slot=view_slot)
d.start(row_list, current_row, view_slot=view_slot,
set_current_callback=set_current_callback)
return d.changed, d.rows_to_refresh
if __name__ == '__main__':

View File

@ -0,0 +1,39 @@
#!/usr/bin/env python
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
from __future__ import (unicode_literals, division, absolute_import,
print_function)
__license__ = 'GPL v3'
__copyright__ = '2011, Kovid Goyal <kovid@kovidgoyal.net>'
__docformat__ = 'restructuredtext en'
from PyQt4.Qt import (QStyledItemDelegate, QTextDocument, QRectF,
QStyle, QApplication)
class RichTextDelegate(QStyledItemDelegate): # {{{
def __init__(self, parent=None):
QStyledItemDelegate.__init__(self, parent)
def to_doc(self, index):
doc = QTextDocument()
doc.setHtml(index.data().toString())
return doc
def sizeHint(self, option, index):
ans = self.to_doc(index).size().toSize()
ans.setHeight(ans.height()+10)
return ans
def paint(self, painter, option, index):
painter.save()
painter.setClipRect(QRectF(option.rect))
if hasattr(QStyle, 'CE_ItemViewItem'):
QApplication.style().drawControl(QStyle.CE_ItemViewItem, option, painter)
elif option.state & QStyle.State_Selected:
painter.fillRect(option.rect, option.palette.highlight())
painter.translate(option.rect.topLeft())
self.to_doc(index).drawContents(painter)
painter.restore()
# }}}

View File

@ -985,6 +985,7 @@ class TagsModel(QAbstractItemModel): # {{{
def do_drop_from_library(self, md, action, row, column, parent):
idx = parent
if idx.isValid():
self.tags_view.setCurrentIndex(idx)
node = self.data(idx, Qt.UserRole)
if node.type == TagTreeItem.TAG:
fm = self.db.metadata_for_field(node.tag.category)

View File

@ -24,6 +24,8 @@ from calibre.library.server.xml import XMLServer
from calibre.library.server.opds import OPDSServer
from calibre.library.server.cache import Cache
from calibre.library.server.browse import BrowseServer
from calibre.utils.search_query_parser import saved_searches
from calibre import prints
class DispatchController(object): # {{{
@ -178,7 +180,12 @@ class LibraryServer(ContentServer, MobileServer, XMLServer, OPDSServer, Cache,
def set_search_restriction(self, restriction):
self.search_restriction_name = restriction
if restriction:
self.search_restriction = 'search:"%s"'%restriction
if restriction not in saved_searches().names():
prints('WARNING: Content server: search restriction ',
restriction, ' does not exist')
self.search_restriction = ''
else:
self.search_restriction = 'search:"%s"'%restriction
else:
self.search_restriction = ''
self.reset_caches()

View File

@ -30,7 +30,7 @@ entry_points = {
'calibre-customize = calibre.customize.ui:main',
'calibre-complete = calibre.utils.complete:main',
'pdfmanipulate = calibre.ebooks.pdf.manipulate.cli:main',
'fetch-ebook-metadata = calibre.ebooks.metadata.fetch:main',
'fetch-ebook-metadata = calibre.ebooks.metadata.sources.cli:main',
'epub-fix = calibre.ebooks.epub.fix.main:main',
'calibre-smtp = calibre.utils.smtp:main',
],
@ -183,7 +183,7 @@ class PostInstall:
from calibre.ebooks.lrf.lrfparser import option_parser as lrf2lrsop
from calibre.gui2.lrf_renderer.main import option_parser as lrfviewerop
from calibre.gui2.viewer.main import option_parser as viewer_op
from calibre.ebooks.metadata.fetch import option_parser as fem_op
from calibre.ebooks.metadata.sources.cli import option_parser as fem_op
from calibre.gui2.main import option_parser as guiop
from calibre.utils.smtp import option_parser as smtp_op
from calibre.library.server.main import option_parser as serv_op

View File

@ -126,7 +126,7 @@ html_use_modindex = False
html_use_index = False
# If true, the reST sources are included in the HTML build as _sources/<name>.
html_copy_source = False
html_copy_source = True
# Output file base name for HTML help builder.
htmlhelp_basename = 'calibredoc'

View File

@ -99,7 +99,8 @@ We just need some information from you:
device.
Once you send us the output for a particular operating system, support for the device in that operating system
will appear in the next release of |app|.
will appear in the next release of |app|. To send us the output, open a bug report and attach the output to it.
See `calibre bugs <http://calibre-ebook.com/bugs>`_.
My device is not being detected by |app|?
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

View File

@ -71,7 +71,7 @@ Edit metadata
|emii| The :guilabel:`Edit metadata` action has six variations, which can be accessed by clicking the down arrow on the right side of the button.
1. **Edit metadata individually**: This allows you to edit the metadata of books one-by-one, with the option of fetching metadata, including covers from the internet. It also allows you to add/remove particular ebook formats from a book. For more detail see :ref:`metadata`.
1. **Edit metadata individually**: This allows you to edit the metadata of books one-by-one, with the option of fetching metadata, including covers from the internet. It also allows you to add/remove particular ebook formats from a book.
2. **Edit metadata in bulk**: This allows you to edit common metadata fields for large numbers of books simulataneously. It operates on all the books you have selected in the :ref:`Library view <search_sort>`.
3. **Download metadata and covers**: Downloads metadata and covers (if available), for the books that are selected in the book list.
4. **Download only metadata**: Downloads only metadata (if available), for the books that are selected in the book list.
@ -79,6 +79,7 @@ Edit metadata
6. **Download only social metadata**: Downloads only social metadata such as tags and reviews (if available), for the books that are selected in the book list.
7. **Merge Book Records**: Gives you the capability of merging the metadata and formats of two or more book records together. You can choose to either delete or keep the records that were not clicked first.
For more details see :ref:`metadata`.
.. _convert_ebooks:

View File

@ -70,7 +70,7 @@ Customizing |app|'s e-book conversion
.. toctree::
:maxdepth: 2
viewer
conversion
Editing e-book metadata
^^^^^^^^^^^^^^^^^^^^^^^^^^^
@ -78,7 +78,7 @@ Editing e-book metadata
.. toctree::
:maxdepth: 2
viewer
metadata
Frequently Asked Questions
^^^^^^^^^^^^^^^^^^^^^^^^^^^

View File

@ -784,6 +784,7 @@ def write_tweaks(raw):
tweaks = read_tweaks()
test_eight_code = tweaks.get('test_eight_code', False)
def migrate():
if hasattr(os, 'geteuid') and os.geteuid() == 0:

View File

@ -92,12 +92,12 @@ def save_cover_data_to(data, path, bgcolor='#ffffff', resize_to=None,
ret = None
if return_data:
ret = data
if changed:
if changed or isinstance(ret, Image):
if hasattr(img, 'set_compression_quality') and fmt == 'jpg':
img.set_compression_quality(compression_quality)
ret = img.export(fmt)
else:
if changed:
if changed or isinstance(ret, Image):
if hasattr(img, 'set_compression_quality') and fmt == 'jpg':
img.set_compression_quality(compression_quality)
img.save(path)