Merge from trunk

This commit is contained in:
Charles Haley 2011-04-19 08:56:49 +01:00
commit b9d90ec8e1
18 changed files with 778 additions and 123 deletions

View File

@ -8,23 +8,36 @@ __copyright__ = '2009, Kovid Goyal <kovid@kovidgoyal.net> edited by Huan T'
from calibre.web.feeds.news import BasicNewsRecipe
class Slashdot(BasicNewsRecipe):
title = u'Slashdot.org'
description = '''Tech news. WARNING: This recipe downloads a lot
of content and may result in your IP being banned from slashdot.org'''
oldest_article = 7
simultaneous_downloads = 1
delay = 3
max_articles_per_feed = 100
language = 'en'
title = u'Slashdot.org'
description = '''Tech news. WARNING: This recipe downloads a lot
of content and may result in your IP being banned from slashdot.org'''
oldest_article = 7
simultaneous_downloads = 1
delay = 3
max_articles_per_feed = 100
language = 'en'
__author__ = 'floweros edited by Huan T'
no_stylesheets = True
# keep_only_tags = [
# dict(name='div',attrs={'class':'article'}),
# dict(name='div',attrs={'class':'commentTop'}),
# ]
__author__ = 'floweros edited by Huan T'
no_stylesheets = True
keep_only_tags = [
dict(name='div',attrs={'id':'article'}),
dict(name='div',attrs={'class':['postBody' 'details']}),
dict(name='footer',attrs={'class':['clearfix meta article-foot']}),
dict(name='article',attrs={'class':['fhitem fhitem-story article usermode thumbs grid_24']}),
dict(name='dl',attrs={'class':'relatedPosts'}),
dict(name='h2',attrs={'class':'story'}),
dict(name='span',attrs={'class':'comments'}),
]
feeds = [
remove_tags = [
dict(name='aside',attrs={'id':'slashboxes'}),
dict(name='div',attrs={'class':'paginate'}),
dict(name='section',attrs={'id':'comments'}),
dict(name='span',attrs={'class':'topic'}),
]
feeds = [
(u'Slashdot',
u'http://rss.slashdot.org/Slashdot/slashdot'),
(u'/. IT',
@ -37,5 +50,3 @@ class Slashdot(BasicNewsRecipe):
u'http://rss.slashdot.org/Slashdot/slashdotYourRightsOnline')
]
def get_article_url(self, article):
return article.get('feedburner_origlink', None)

View File

@ -626,8 +626,9 @@ if test_eight_code:
from calibre.ebooks.metadata.sources.amazon import Amazon
from calibre.ebooks.metadata.sources.openlibrary import OpenLibrary
from calibre.ebooks.metadata.sources.isbndb import ISBNDB
from calibre.ebooks.metadata.sources.overdrive import OverDrive
plugins += [GoogleBooks, Amazon, OpenLibrary, ISBNDB]
plugins += [GoogleBooks, Amazon, OpenLibrary, ISBNDB, OverDrive]
# }}}
else:

View File

@ -108,10 +108,10 @@ class ANDROID(USBMS):
'SGH-T849', '_MB300', 'A70S', 'S_ANDROID', 'A101IT', 'A70H',
'IDEOS_TABLET', 'MYTOUCH_4G', 'UMS_COMPOSITE', 'SCH-I800_CARD',
'7', 'A956', 'A955', 'A43', 'ANDROID_PLATFORM', 'TEGRA_2',
'MB860', 'MULTI-CARD', 'MID7015A']
'MB860', 'MULTI-CARD', 'MID7015A', 'INCREDIBLE']
WINDOWS_CARD_A_MEM = ['ANDROID_PHONE', 'GT-I9000_CARD', 'SGH-I897',
'FILE-STOR_GADGET', 'SGH-T959', 'SAMSUNG_ANDROID', 'GT-P1000_CARD',
'A70S', 'A101IT', '7']
'A70S', 'A101IT', '7', 'INCREDIBLE']
OSX_MAIN_MEM = 'Android Device Main Memory'

View File

@ -399,7 +399,7 @@ class HTMLPreProcessor(object):
(re.compile(u'˙\s*(<br.*?>)*\s*Z', re.UNICODE), lambda match: u'Ż'),
# If pdf printed from a browser then the header/footer has a reliable pattern
(re.compile(r'((?<=</a>)\s*file:////?[A-Z].*<br>|file:////?[A-Z].*<br>(?=\s*<hr>))', re.IGNORECASE), lambda match: ''),
(re.compile(r'((?<=</a>)\s*file:/{2,4}[A-Z].*<br>|file:////?[A-Z].*<br>(?=\s*<hr>))', re.IGNORECASE), lambda match: ''),
# Center separator lines
(re.compile(u'<br>\s*(?P<break>([*#•✦=]+\s*)+)\s*<br>'), lambda match: '<p>\n<p style="text-align:center">' + match.group(1) + '</p>'),

View File

@ -764,6 +764,7 @@ class HeuristicProcessor(object):
# Multiple sequential blank paragraphs are merged with appropriate margins
# If non-blank scene breaks exist they are center aligned and styled with appropriate margins.
if getattr(self.extra_opts, 'format_scene_breaks', False):
html = re.sub('(?i)<div[^>]*>\s*<br(\s?/)?>\s*</div>', '<p></p>', html)
html = self.detect_whitespace(html)
html = self.detect_soft_breaks(html)
blanks_count = len(self.any_multi_blank.findall(html))

View File

@ -274,26 +274,34 @@ class Source(Plugin):
if authors:
# Leave ' in there for Irish names
pat = re.compile(r'[-,:;+!@#$%^&*(){}.`~"\s\[\]/]')
remove_pat = re.compile(r'[,!@#$%^&*(){}`~"\s\[\]/]')
replace_pat = re.compile(r'[-+.:;]')
if only_first_author:
authors = authors[:1]
for au in authors:
au = replace_pat.sub(' ', au)
parts = au.split()
if ',' in au:
# au probably in ln, fn form
parts = parts[1:] + parts[:1]
for tok in parts:
tok = pat.sub('', tok).strip()
tok = remove_pat.sub('', tok).strip()
if len(tok) > 2 and tok.lower() not in ('von', ):
yield tok
def get_title_tokens(self, title):
def get_title_tokens(self, title, strip_joiners=True, strip_subtitle=False):
'''
Take a title and return a list of tokens useful for an AND search query.
Excludes connectives and punctuation.
Excludes connectives(optionally) and punctuation.
'''
if title:
# strip sub-titles
if strip_subtitle:
subtitle = re.compile(r'([\(\[\{].*?[\)\]\}]|[/:\\].*$)')
if len(subtitle.sub('', title)) > 1:
title = subtitle.sub('', title)
title_patterns = [(re.compile(pat, re.IGNORECASE), repl) for pat, repl in
[
# Remove things like: (2010) (Omnibus) etc.
@ -305,17 +313,20 @@ class Source(Plugin):
(r'(\d+),(\d+)', r'\1\2'),
# Remove hyphens only if they have whitespace before them
(r'(\s-)', ' '),
# Remove single quotes
(r"'", ''),
# Remove single quotes not followed by 's'
(r"'(?!s)", ''),
# Replace other special chars with a space
(r'''[:,;+!@#$%^&*(){}.`~"\s\[\]/]''', ' ')
]]
for pat, repl in title_patterns:
title = pat.sub(repl, title)
tokens = title.split()
for token in tokens:
token = token.strip()
if token and token.lower() not in ('a', 'and', 'the'):
if token and (not strip_joiners or token.lower() not in ('a',
'and', 'the', '&')):
yield token
def split_jobs(self, jobs, num):
@ -363,7 +374,12 @@ class Source(Plugin):
def get_book_url(self, identifiers):
'''
Return the URL for the book identified by identifiers at this source.
If no URL is found, return None.
This URL must be browseable to by a human using a browser. It is meant
to provide a clickable link for the user to easily visit the books page
at this source.
If no URL is found, return None. This method must be quick, and
consistent, so only implement it if it is possible to construct the URL
from a known scheme given identifiers.
'''
return None

View File

@ -433,7 +433,7 @@ def urls_from_identifiers(identifiers): # {{{
pass
isbn = identifiers.get('isbn', None)
if isbn:
ans.append(('ISBN',
ans.append((isbn,
'http://www.worldcat.org/search?q=bn%%3A%s&qt=advanced'%isbn))
return ans
# }}}
@ -444,13 +444,18 @@ if __name__ == '__main__': # tests {{{
from calibre.ebooks.metadata.sources.test import (test_identify,
title_test, authors_test)
tests = [
(
{'title':'Magykal Papers',
'authors':['Sage']},
[title_test('The Magykal Papers', exact=True)],
),
( # An e-book ISBN not on Amazon, one of the authors is
# unknown to Amazon
{'identifiers':{'isbn': '9780307459671'},
'title':'Invisible Gorilla', 'authors':['Christopher Chabris']},
[title_test('The Invisible Gorilla',
exact=True), authors_test(['Christopher Chabris', 'Daniel Simons'])]
[title_test('The Invisible Gorilla', exact=True)]
),

View File

@ -0,0 +1,439 @@
#!/usr/bin/env python
from __future__ import (unicode_literals, division, absolute_import,
print_function)
__license__ = 'GPL v3'
__copyright__ = '2008, Kovid Goyal kovid@kovidgoyal.net'
__docformat__ = 'restructuredtext en'
'''
Fetch metadata using Overdrive Content Reserve
'''
import re, random, mechanize, copy
from threading import RLock
from Queue import Queue, Empty
from lxml import html
from lxml.html import soupparser
from calibre.ebooks.metadata import check_isbn
from calibre.ebooks.metadata.sources.base import Source
from calibre.ebooks.metadata.book.base import Metadata
from calibre.ebooks.chardet import xml_to_unicode
from calibre.library.comments import sanitize_comments_html
ovrdrv_data_cache = {}
cover_url_cache = {}
cache_lock = RLock()
base_url = 'http://search.overdrive.com/'
class OverDrive(Source):
name = 'Overdrive'
description = _('Downloads metadata from Overdrive\'s Content Reserve')
capabilities = frozenset(['identify', 'cover'])
touched_fields = frozenset(['title', 'authors', 'tags', 'pubdate',
'comments', 'publisher', 'identifier:isbn', 'series', 'series_index',
'language', 'identifier:overdrive'])
has_html_comments = True
supports_gzip_transfer_encoding = False
cached_cover_url_is_reliable = True
def __init__(self, *args, **kwargs):
Source.__init__(self, *args, **kwargs)
self.prefs.defaults['ignore_fields'] =['tags', 'pubdate', 'comments', 'identifier:isbn', 'language']
def identify(self, log, result_queue, abort, title=None, authors=None, # {{{
identifiers={}, timeout=30):
ovrdrv_id = identifiers.get('overdrive', None)
isbn = identifiers.get('isbn', None)
br = self.browser
ovrdrv_data = self.to_ovrdrv_data(br, title, authors, ovrdrv_id)
if ovrdrv_data:
title = ovrdrv_data[8]
authors = ovrdrv_data[6]
mi = Metadata(title, authors)
self.parse_search_results(ovrdrv_data, mi)
if ovrdrv_id is None:
ovrdrv_id = ovrdrv_data[7]
if isbn is not None:
self.cache_isbn_to_identifier(isbn, ovrdrv_id)
self.get_book_detail(br, ovrdrv_data[1], mi, ovrdrv_id, log)
result_queue.put(mi)
return None
# }}}
def download_cover(self, log, result_queue, abort, # {{{
title=None, authors=None, identifiers={}, timeout=30):
cached_url = self.get_cached_cover_url(identifiers)
if cached_url is None:
log.info('No cached cover found, running identify')
rq = Queue()
self.identify(log, rq, abort, title=title, authors=authors,
identifiers=identifiers)
if abort.is_set():
return
results = []
while True:
try:
results.append(rq.get_nowait())
except Empty:
break
results.sort(key=self.identify_results_keygen(
title=title, authors=authors, identifiers=identifiers))
for mi in results:
cached_url = self.get_cached_cover_url(mi.identifiers)
if cached_url is not None:
break
if cached_url is None:
log.info('No cover found')
return
if abort.is_set():
return
ovrdrv_id = identifiers.get('overdrive', None)
br = self.browser
referer = self.get_base_referer()+'ContentDetails-Cover.htm?ID='+ovrdrv_id
req = mechanize.Request(cached_url)
req.add_header('referer', referer)
log('Downloading cover from:', cached_url)
try:
cdata = br.open_novisit(req, timeout=timeout).read()
result_queue.put((self, cdata))
except:
log.exception('Failed to download cover from:', cached_url)
# }}}
def get_cached_cover_url(self, identifiers): # {{{
url = None
ovrdrv_id = identifiers.get('overdrive', None)
if ovrdrv_id is None:
isbn = identifiers.get('isbn', None)
if isbn is not None:
ovrdrv_id = self.cached_isbn_to_identifier(isbn)
if ovrdrv_id is not None:
url = self.cached_identifier_to_cover_url(ovrdrv_id)
return url
# }}}
def get_base_referer(self): # to be used for passing referrer headers to cover download
choices = [
'http://overdrive.chipublib.org/82DC601D-7DDE-4212-B43A-09D821935B01/10/375/en/',
'http://emedia.clevnet.org/9D321DAD-EC0D-490D-BFD8-64AE2C96ECA8/10/241/en/',
'http://singapore.lib.overdrive.com/F11D55BE-A917-4D63-8111-318E88B29740/10/382/en/',
'http://ebooks.nypl.org/20E48048-A377-4520-BC43-F8729A42A424/10/257/en/',
'http://spl.lib.overdrive.com/5875E082-4CB2-4689-9426-8509F354AFEF/10/335/en/'
]
return choices[random.randint(0, len(choices)-1)]
def format_results(self, reserveid, od_title, subtitle, series, publisher, creators, thumbimage, worldcatlink, formatid):
fix_slashes = re.compile(r'\\/')
thumbimage = fix_slashes.sub('/', thumbimage)
worldcatlink = fix_slashes.sub('/', worldcatlink)
cover_url = re.sub('(?P<img>(Ima?g(eType-)?))200', '\g<img>100', thumbimage)
social_metadata_url = base_url+'TitleInfo.aspx?ReserveID='+reserveid+'&FormatID='+formatid
series_num = ''
if not series:
if subtitle:
title = od_title+': '+subtitle
else:
title = od_title
else:
title = od_title
m = re.search("([0-9]+$)", subtitle)
if m:
series_num = float(m.group(1))
return [cover_url, social_metadata_url, worldcatlink, series, series_num, publisher, creators, reserveid, title]
def safe_query(self, br, query_url, post=''):
'''
The query must be initialized by loading an empty search results page
this page attempts to set a cookie that Mechanize doesn't like
copy the cookiejar to a separate instance and make a one-off request with the temp cookiejar
'''
goodcookies = br._ua_handlers['_cookies'].cookiejar
clean_cj = mechanize.CookieJar()
cookies_to_copy = []
for cookie in goodcookies:
copied_cookie = copy.deepcopy(cookie)
cookies_to_copy.append(copied_cookie)
for copied_cookie in cookies_to_copy:
clean_cj.set_cookie(copied_cookie)
if post:
br.open_novisit(query_url, post)
else:
br.open_novisit(query_url)
br.set_cookiejar(clean_cj)
def overdrive_search(self, br, q, title, author):
# re-initialize the cookiejar to so that it's clean
clean_cj = mechanize.CookieJar()
br.set_cookiejar(clean_cj)
q_query = q+'default.aspx/SearchByKeyword'
q_init_search = q+'SearchResults.aspx'
# get first author as string - convert this to a proper cleanup function later
author_tokens = list(self.get_author_tokens(author,
only_first_author=True))
title_tokens = list(self.get_title_tokens(title,
strip_joiners=False, strip_subtitle=True))
if len(title_tokens) >= len(author_tokens):
initial_q = ' '.join(title_tokens)
xref_q = '+'.join(author_tokens)
else:
initial_q = ' '.join(author_tokens)
xref_q = '+'.join(title_tokens)
q_xref = q+'SearchResults.svc/GetResults?iDisplayLength=50&sSearch='+xref_q
query = '{"szKeyword":"'+initial_q+'"}'
# main query, requires specific Content Type header
req = mechanize.Request(q_query)
req.add_header('Content-Type', 'application/json; charset=utf-8')
br.open_novisit(req, query)
# initiate the search without messing up the cookiejar
self.safe_query(br, q_init_search)
# get the search results object
results = False
while results == False:
xreq = mechanize.Request(q_xref)
xreq.add_header('X-Requested-With', 'XMLHttpRequest')
xreq.add_header('Referer', q_init_search)
xreq.add_header('Accept', 'application/json, text/javascript, */*')
raw = br.open_novisit(xreq).read()
for m in re.finditer(ur'"iTotalDisplayRecords":(?P<displayrecords>\d+).*?"iTotalRecords":(?P<totalrecords>\d+)', raw):
if int(m.group('displayrecords')) >= 1:
results = True
elif int(m.group('totalrecords')) >= 1:
xref_q = ''
q_xref = q+'SearchResults.svc/GetResults?iDisplayLength=50&sSearch='+xref_q
elif int(m.group('totalrecords')) == 0:
return ''
return self.sort_ovrdrv_results(raw, title, title_tokens, author, author_tokens)
def sort_ovrdrv_results(self, raw, title=None, title_tokens=None, author=None, author_tokens=None, ovrdrv_id=None):
close_matches = []
raw = re.sub('.*?\[\[(?P<content>.*?)\]\].*', '[[\g<content>]]', raw)
results = eval(raw)
#print results
# The search results are either from a keyword search or a multi-format list from a single ID,
# sort through the results for closest match/format
if results:
for reserveid, od_title, subtitle, edition, series, publisher, format, formatid, creators, \
thumbimage, shortdescription, worldcatlink, excerptlink, creatorfile, sorttitle, \
availabletolibrary, availabletoretailer, relevancyrank, unknown1, unknown2, unknown3 in results:
#print "this record's title is "+od_title+", subtitle is "+subtitle+", author[s] are "+creators+", series is "+series
if ovrdrv_id is not None and int(formatid) in [1, 50, 410, 900]:
#print "overdrive id is not None, searching based on format type priority"
return self.format_results(reserveid, od_title, subtitle, series, publisher,
creators, thumbimage, worldcatlink, formatid)
else:
creators = creators.split(', ')
# if an exact match in a preferred format occurs
if (author and creators[0] == author[0]) and od_title == title and int(formatid) in [1, 50, 410, 900]:
return self.format_results(reserveid, od_title, subtitle, series, publisher,
creators, thumbimage, worldcatlink, formatid)
else:
close_title_match = False
close_author_match = False
for token in title_tokens:
if od_title.lower().find(token.lower()) != -1:
close_title_match = True
else:
close_title_match = False
break
for author in creators:
for token in author_tokens:
if author.lower().find(token.lower()) != -1:
close_author_match = True
else:
close_author_match = False
break
if close_author_match:
break
if close_title_match and close_author_match and int(formatid) in [1, 50, 410, 900] and thumbimage:
if subtitle and series:
close_matches.insert(0, self.format_results(reserveid, od_title, subtitle, series, publisher, creators, thumbimage, worldcatlink, formatid))
else:
close_matches.append(self.format_results(reserveid, od_title, subtitle, series, publisher, creators, thumbimage, worldcatlink, formatid))
if close_matches:
return close_matches[0]
else:
return ''
else:
return ''
def overdrive_get_record(self, br, q, ovrdrv_id):
search_url = q+'SearchResults.aspx?ReserveID={'+ovrdrv_id+'}'
results_url = q+'SearchResults.svc/GetResults?sEcho=1&iColumns=18&sColumns=ReserveID%2CTitle%2CSubtitle%2CEdition%2CSeries%2CPublisher%2CFormat%2CFormatID%2CCreators%2CThumbImage%2CShortDescription%2CWorldCatLink%2CExcerptLink%2CCreatorFile%2CSortTitle%2CAvailableToLibrary%2CAvailableToRetailer%2CRelevancyRank&iDisplayStart=0&iDisplayLength=10&sSearch=&bEscapeRegex=true&iSortingCols=1&iSortCol_0=17&sSortDir_0=asc'
# re-initialize the cookiejar to so that it's clean
clean_cj = mechanize.CookieJar()
br.set_cookiejar(clean_cj)
# get the base url to set the proper session cookie
br.open_novisit(q)
# initialize the search
self.safe_query(br, search_url)
# get the results
req = mechanize.Request(results_url)
req.add_header('X-Requested-With', 'XMLHttpRequest')
req.add_header('Referer', search_url)
req.add_header('Accept', 'application/json, text/javascript, */*')
raw = br.open_novisit(req)
raw = str(list(raw))
clean_cj = mechanize.CookieJar()
br.set_cookiejar(clean_cj)
return self.sort_ovrdrv_results(raw, None, None, None, ovrdrv_id)
def find_ovrdrv_data(self, br, title, author, isbn, ovrdrv_id=None):
q = base_url
if ovrdrv_id is None:
return self.overdrive_search(br, q, title, author)
else:
return self.overdrive_get_record(br, q, ovrdrv_id)
def to_ovrdrv_data(self, br, title=None, author=None, ovrdrv_id=None):
'''
Takes either a title/author combo or an Overdrive ID. One of these
two must be passed to this function.
'''
if ovrdrv_id is not None:
with cache_lock:
ans = ovrdrv_data_cache.get(ovrdrv_id, None)
if ans:
return ans
elif ans is False:
return None
else:
ovrdrv_data = self.find_ovrdrv_data(br, title, author, ovrdrv_id)
else:
try:
ovrdrv_data = self.find_ovrdrv_data(br, title, author, ovrdrv_id)
except:
import traceback
traceback.print_exc()
ovrdrv_data = None
with cache_lock:
ovrdrv_data_cache[ovrdrv_id] = ovrdrv_data if ovrdrv_data else False
return ovrdrv_data if ovrdrv_data else False
def parse_search_results(self, ovrdrv_data, mi):
'''
Parse the formatted search results from the initial Overdrive query and
add the values to the metadta.
The list object has these values:
[cover_url[0], social_metadata_url[1], worldcatlink[2], series[3], series_num[4],
publisher[5], creators[6], reserveid[7], title[8]]
'''
ovrdrv_id = ovrdrv_data[7]
mi.set_identifier('overdrive', ovrdrv_id)
if len(ovrdrv_data[3]) > 1:
mi.series = ovrdrv_data[3]
if ovrdrv_data[4]:
try:
mi.series_index = float(ovrdrv_data[4])
except:
pass
mi.publisher = ovrdrv_data[5]
mi.authors = ovrdrv_data[6]
mi.title = ovrdrv_data[8]
cover_url = ovrdrv_data[0]
if cover_url:
self.cache_identifier_to_cover_url(ovrdrv_id,
cover_url)
def get_book_detail(self, br, metadata_url, mi, ovrdrv_id, log):
try:
raw = br.open_novisit(metadata_url).read()
except Exception, e:
if callable(getattr(e, 'getcode', None)) and \
e.getcode() == 404:
return False
raise
raw = xml_to_unicode(raw, strip_encoding_pats=True,
resolve_entities=True)[0]
try:
root = soupparser.fromstring(raw)
except:
return False
pub_date = root.xpath("//div/label[@id='ctl00_ContentPlaceHolder1_lblPubDate']/text()")
lang = root.xpath("//div/label[@id='ctl00_ContentPlaceHolder1_lblLanguage']/text()")
subjects = root.xpath("//div/label[@id='ctl00_ContentPlaceHolder1_lblSubjects']/text()")
ebook_isbn = root.xpath("//td/label[@id='ctl00_ContentPlaceHolder1_lblIdentifier']/text()")
desc = root.xpath("//div/label[@id='ctl00_ContentPlaceHolder1_lblDescription']/ancestor::div[1]")
if pub_date:
from calibre.utils.date import parse_date
mi.pubdate = parse_date(pub_date[0].strip())
if lang:
mi.language = lang[0].strip()
if ebook_isbn:
#print "ebook isbn is "+str(ebook_isbn[0])
isbn = check_isbn(ebook_isbn[0].strip())
if isbn:
self.cache_isbn_to_identifier(isbn, ovrdrv_id)
mi.isbn = isbn
if subjects:
mi.tags = [tag.strip() for tag in subjects[0].split(',')]
if desc:
desc = desc[0]
desc = html.tostring(desc, method='html', encoding=unicode).strip()
# remove all attributes from tags
desc = re.sub(r'<([a-zA-Z0-9]+)\s[^>]+>', r'<\1>', desc)
# Remove comments
desc = re.sub(r'(?s)<!--.*?-->', '', desc)
mi.comments = sanitize_comments_html(desc)
return None
if __name__ == '__main__':
# To run these test use:
# calibre-debug -e src/calibre/ebooks/metadata/sources/overdrive.py
from calibre.ebooks.metadata.sources.test import (test_identify_plugin,
title_test, authors_test)
test_identify_plugin(OverDrive.name,
[
(
{'title':'Foundation and Earth',
'authors':['Asimov']},
[title_test('Foundation and Earth', exact=True),
authors_test(['Isaac Asimov'])]
),
(
{'title': 'Elephants', 'authors':['Agatha']},
[title_test('Elephants Can Remember', exact=False),
authors_test(['Agatha Christie'])]
),
])

View File

@ -37,8 +37,6 @@ class EditMetadataAction(InterfaceAction):
md.addSeparator()
if test_eight_code:
dall = self.download_metadata
dident = partial(self.download_metadata, covers=False)
dcovers = partial(self.download_metadata, identify=False)
else:
dall = partial(self.download_metadata_old, False, covers=True)
dident = partial(self.download_metadata_old, False, covers=False)
@ -47,9 +45,9 @@ class EditMetadataAction(InterfaceAction):
md.addAction(_('Download metadata and covers'), dall,
Qt.ControlModifier+Qt.Key_D)
md.addAction(_('Download only metadata'), dident)
md.addAction(_('Download only covers'), dcovers)
if not test_eight_code:
md.addAction(_('Download only metadata'), dident)
md.addAction(_('Download only covers'), dcovers)
md.addAction(_('Download only social metadata'),
partial(self.download_metadata_old, False, covers=False,
set_metadata=False, set_social_metadata=True))
@ -80,7 +78,7 @@ class EditMetadataAction(InterfaceAction):
self.qaction.setEnabled(enabled)
self.action_merge.setEnabled(enabled)
def download_metadata(self, identify=True, covers=True, ids=None):
def download_metadata(self, ids=None):
if ids is None:
rows = self.gui.library_view.selectionModel().selectedRows()
if not rows or len(rows) == 0:
@ -90,7 +88,7 @@ class EditMetadataAction(InterfaceAction):
ids = [db.id(row.row()) for row in rows]
from calibre.gui2.metadata.bulk_download2 import start_download
start_download(self.gui, ids,
Dispatcher(self.bulk_metadata_downloaded), identify, covers)
Dispatcher(self.bulk_metadata_downloaded))
def bulk_metadata_downloaded(self, job):
if job.failed:

View File

@ -29,8 +29,7 @@ from calibre.ebooks.metadata.meta import set_metadata
from calibre.constants import DEBUG
from calibre.utils.config import prefs, tweaks
from calibre.utils.magick.draw import thumbnail
from calibre.library.save_to_disk import plugboard_any_device_value, \
plugboard_any_format_value
from calibre.library.save_to_disk import find_plugboard
# }}}
class DeviceJob(BaseJob): # {{{
@ -93,23 +92,6 @@ class DeviceJob(BaseJob): # {{{
# }}}
def find_plugboard(device_name, format, plugboards):
cpb = None
if format in plugboards:
cpb = plugboards[format]
elif plugboard_any_format_value in plugboards:
cpb = plugboards[plugboard_any_format_value]
if cpb is not None:
if device_name in cpb:
cpb = cpb[device_name]
elif plugboard_any_device_value in cpb:
cpb = cpb[plugboard_any_device_value]
else:
cpb = None
if DEBUG:
prints('Device using plugboard', format, device_name, cpb)
return cpb
def device_name_for_plugboards(device_class):
if hasattr(device_class, 'DEVICE_PLUGBOARD_NAME'):
return device_class.DEVICE_PLUGBOARD_NAME
@ -607,6 +589,13 @@ class DeviceMenu(QMenu): # {{{
class DeviceMixin(object): # {{{
#: This signal is emitted once, after metadata is downloaded from the
#: connected device.
#: The sequence: gui.device_manager.is_device_connected will become True,
#: then sometime later gui.device_metadata_available will be signaled.
#: This does not mean that there are no more jobs running. Automatic metadata
#: management might have kicked off a sync_booklists to write new metadata onto
#: the device, and that job might still be running when the signal is emitted.
device_metadata_available = pyqtSignal()
device_connection_changed = pyqtSignal(object)

View File

@ -12,7 +12,8 @@ from functools import partial
from itertools import izip
from PyQt4.Qt import (QIcon, QDialog, QVBoxLayout, QTextBrowser, QSize,
QDialogButtonBox, QApplication, QTimer, QLabel, QProgressBar)
QDialogButtonBox, QApplication, QTimer, QLabel, QProgressBar,
QGridLayout, QPixmap, Qt)
from calibre.gui2.dialogs.message_box import MessageBox
from calibre.gui2.threaded_jobs import ThreadedJob
@ -25,37 +26,86 @@ from calibre.ebooks.metadata.book.base import Metadata
from calibre.customize.ui import metadata_plugins
from calibre.ptempfile import PersistentTemporaryFile
# Start download {{{
def show_config(gui, parent):
from calibre.gui2.preferences import show_config_widget
show_config_widget('Sharing', 'Metadata download', parent=parent,
gui=gui, never_shutdown=True)
def start_download(gui, ids, callback, identify, covers):
q = MessageBox(MessageBox.QUESTION, _('Schedule download?'),
class ConfirmDialog(QDialog):
def __init__(self, ids, parent):
QDialog.__init__(self, parent)
self.setWindowTitle(_('Schedule download?'))
self.setWindowIcon(QIcon(I('dialog_question.png')))
l = self.l = QGridLayout()
self.setLayout(l)
i = QLabel(self)
i.setPixmap(QPixmap(I('dialog_question.png')))
l.addWidget(i, 0, 0)
t = QLabel(
'<p>'+_('The download of metadata for the <b>%d selected book(s)</b> will'
' run in the background. Proceed?')%len(ids) +
'<p>'+_('You can monitor the progress of the download '
'by clicking the rotating spinner in the bottom right '
'corner.') +
'<p>'+_('When the download completes you will be asked for'
' confirmation before calibre applies the downloaded metadata.'),
show_copy_button=False, parent=gui)
b = q.bb.addButton(_('Configure download'), q.bb.ActionRole)
b.setIcon(QIcon(I('config.png')))
b.clicked.connect(partial(show_config, gui, q))
q.det_msg_toggle.setVisible(False)
' confirmation before calibre applies the downloaded metadata.')
)
t.setWordWrap(True)
l.addWidget(t, 0, 1)
l.setColumnStretch(0, 1)
l.setColumnStretch(1, 100)
ret = q.exec_()
b.clicked.disconnect()
if ret != q.Accepted:
self.identify = self.covers = True
self.bb = QDialogButtonBox(QDialogButtonBox.Cancel)
self.bb.rejected.connect(self.reject)
b = self.bb.addButton(_('Download only &metadata'),
self.bb.AcceptRole)
b.clicked.connect(self.only_metadata)
b.setIcon(QIcon(I('edit_input.png')))
b = self.bb.addButton(_('Download only &covers'),
self.bb.AcceptRole)
b.clicked.connect(self.only_covers)
b.setIcon(QIcon(I('default_cover.png')))
b = self.b = self.bb.addButton(_('&Configure download'), self.bb.ActionRole)
b.setIcon(QIcon(I('config.png')))
b.clicked.connect(partial(show_config, parent, self))
l.addWidget(self.bb, 1, 0, 1, 2)
b = self.bb.addButton(_('Download &both'),
self.bb.AcceptRole)
b.clicked.connect(self.accept)
b.setDefault(True)
b.setAutoDefault(True)
b.setIcon(QIcon(I('ok.png')))
self.resize(self.sizeHint())
b.setFocus(Qt.OtherFocusReason)
def only_metadata(self):
self.covers = False
self.accept()
def only_covers(self):
self.identify = False
self.accept()
def start_download(gui, ids, callback):
d = ConfirmDialog(ids, gui)
ret = d.exec_()
d.b.clicked.disconnect()
if ret != d.Accepted:
return
job = ThreadedJob('metadata bulk download',
_('Download metadata for %d books')%len(ids),
download, (ids, gui.current_db, identify, covers), {}, callback)
download, (ids, gui.current_db, d.identify, d.covers), {}, callback)
gui.job_manager.run_threaded_job(job)
gui.status_bar.show_message(_('Metadata download started'), 3000)
# }}}
class ViewLog(QDialog): # {{{
@ -93,9 +143,10 @@ def view_log(job, parent):
# }}}
# Apply downloaded metadata {{{
class ApplyDialog(QDialog):
def __init__(self, id_map, gui):
def __init__(self, gui):
QDialog.__init__(self, gui)
self.l = l = QVBoxLayout()
@ -104,27 +155,33 @@ class ApplyDialog(QDialog):
self.pb = QProgressBar(self)
l.addWidget(self.pb)
self.pb.setMinimum(0)
self.pb.setMaximum(len(id_map))
self.bb = QDialogButtonBox(QDialogButtonBox.Cancel)
self.bb.rejected.connect(self.reject)
self.bb.accepted.connect(self.accept)
l.addWidget(self.bb)
self.gui = gui
self.timer = QTimer(self)
self.timer.timeout.connect(self.do_one)
def start(self, id_map):
self.id_map = list(id_map.iteritems())
self.current_idx = 0
self.failures = []
self.ids = []
self.canceled = False
QTimer.singleShot(20, self.do_one)
self.pb.setMinimum(0)
self.pb.setMaximum(len(id_map))
self.timer.start(50)
def do_one(self):
if self.canceled:
return
if self.current_idx >= len(self.id_map):
self.timer.stop()
self.finalize()
return
i, mi = self.id_map[self.current_idx]
db = self.gui.current_db
try:
@ -144,15 +201,11 @@ class ApplyDialog(QDialog):
pass
self.pb.setValue(self.pb.value()+1)
if self.current_idx >= len(self.id_map) - 1:
self.finalize()
else:
self.current_idx += 1
QTimer.singleShot(20, self.do_one)
self.current_idx += 1
def reject(self):
self.canceled = True
self.timer.stop()
QDialog.reject(self)
def finalize(self):
@ -169,17 +222,18 @@ class ApplyDialog(QDialog):
title += ' - ' + authors_to_string(authors)
msg.append(title+'\n\n'+tb+'\n'+('*'*80))
error_dialog(self, _('Some failures'),
parent = self if self.isVisible() else self.parent()
error_dialog(parent, _('Some failures'),
_('Failed to apply updated metadata for some books'
' in your library. Click "Show Details" to see '
'details.'), det_msg='\n\n'.join(msg), show=True)
self.accept()
if self.ids:
cr = self.gui.library_view.currentIndex().row()
self.gui.library_view.model().refresh_ids(
self.ids, cr)
if self.gui.cover_flow:
self.gui.cover_flow.dataChanged()
self.accept()
_amd = None
def apply_metadata(job, gui, q, result):
@ -217,8 +271,11 @@ def apply_metadata(job, gui, q, result):
'Do you want to proceed?'), det_msg='\n'.join(modified)):
return
_amd = ApplyDialog(id_map, gui)
_amd.exec_()
if _amd is None:
_amd = ApplyDialog(gui)
_amd.start(id_map)
if len(id_map) > 3:
_amd.exec_()
def proceed(gui, job):
gui.status_bar.show_message(_('Metadata download completed'), 3000)
@ -248,6 +305,8 @@ def proceed(gui, job):
q.show()
q.finished.connect(partial(apply_metadata, job, gui, q))
# }}}
def merge_result(oldmi, newmi):
dummy = Metadata(_('Unknown'))
for f in msprefs['ignore_fields']:

View File

@ -30,7 +30,6 @@ from calibre.ebooks.metadata.book.base import Metadata
from calibre.gui2 import error_dialog, NONE
from calibre.utils.date import utcnow, fromordinal, format_date
from calibre.library.comments import comments_to_html
from calibre.constants import islinux
from calibre import force_unicode
# }}}
@ -117,12 +116,10 @@ class CoverDelegate(QStyledItemDelegate): # {{{
def paint(self, painter, option, index):
QStyledItemDelegate.paint(self, painter, option, index)
if islinux:
# On linux for some reason the selected color is drawn on top of
# the decoration
style = QApplication.style()
style.drawItemPixmap(painter, option.rect, Qt.AlignTop|Qt.AlignHCenter,
QPixmap(index.data(Qt.DecorationRole)))
# Ensure the cover is rendered over any selection rect
style = QApplication.style()
style.drawItemPixmap(painter, option.rect, Qt.AlignTop|Qt.AlignHCenter,
QPixmap(index.data(Qt.DecorationRole)))
if self.timer.isActive() and index.data(Qt.UserRole).toBool():
rect = QRect(0, 0, self.spinner_width, self.spinner_width)
rect.moveCenter(option.rect.center())
@ -952,7 +949,7 @@ class CoverFetch(QDialog): # {{{
# }}}
if __name__ == '__main__':
DEBUG_DIALOG = True
#DEBUG_DIALOG = True
app = QApplication([])
d = FullFetch()
d.start(title='great gatsby', authors=['fitzgerald'])

View File

@ -337,7 +337,13 @@ def show_config_widget(category, name, gui=None, show_restart_msg=False,
bb.button(bb.RestoreDefaults).setEnabled(w.supports_restoring_to_defaults)
bb.button(bb.Apply).setEnabled(False)
bb.button(bb.Apply).clicked.connect(d.accept)
w.changed_signal.connect(lambda : bb.button(bb.Apply).setEnabled(True))
def onchange():
b = bb.button(bb.Apply)
b.setEnabled(True)
b.setDefault(True)
b.setAutoDefault(True)
w.changed_signal.connect(onchange)
bb.button(bb.Cancel).setFocus(True)
l = QVBoxLayout()
d.setLayout(l)
l.addWidget(w)

View File

@ -7,6 +7,7 @@ __docformat__ = 'restructuredtext en'
from PyQt4.Qt import Qt, QLineEdit, QComboBox, SIGNAL, QListWidgetItem
from calibre.customize.ui import is_disabled
from calibre.gui2 import error_dialog
from calibre.gui2.device import device_name_for_plugboards
from calibre.gui2.dialogs.template_dialog import TemplateDialog
@ -15,6 +16,8 @@ from calibre.gui2.preferences.plugboard_ui import Ui_Form
from calibre.customize.ui import metadata_writers, device_plugins
from calibre.library.save_to_disk import plugboard_any_format_value, \
plugboard_any_device_value, plugboard_save_to_disk_value
from calibre.library.server.content import plugboard_content_server_value, \
plugboard_content_server_formats
from calibre.utils.formatter import validation_formatter
@ -68,19 +71,26 @@ class ConfigWidget(ConfigWidgetBase, Ui_Form):
self.device_label.setText(_('Device currently connected: None'))
self.devices = ['', 'APPLE', 'FOLDER_DEVICE']
self.device_to_formats_map = {}
for device in device_plugins():
n = device_name_for_plugboards(device)
self.device_to_formats_map[n] = device.FORMATS
if n not in self.devices:
self.devices.append(n)
self.devices.sort(cmp=lambda x, y: cmp(x.lower(), y.lower()))
self.devices.insert(1, plugboard_save_to_disk_value)
self.devices.insert(2, plugboard_any_device_value)
self.devices.insert(1, plugboard_content_server_value)
self.device_to_formats_map[plugboard_content_server_value] = \
plugboard_content_server_formats
self.devices.insert(1, plugboard_any_device_value)
self.new_device.addItems(self.devices)
self.formats = ['']
for w in metadata_writers():
for f in w.file_types:
self.formats.append(f)
if not is_disabled(w):
for f in w.file_types:
if not f in self.formats:
self.formats.append(f)
self.formats.append('device_db')
self.formats.sort()
self.formats.insert(1, plugboard_any_format_value)
@ -230,6 +240,15 @@ class ConfigWidget(ConfigWidgetBase, Ui_Form):
show=True)
self.new_device.setCurrentIndex(0)
return
if self.current_device in self.device_to_formats_map:
allowable_formats = self.device_to_formats_map[self.current_device]
if self.current_format not in allowable_formats:
error_dialog(self, '',
_('The {0} device does not support the {1} format.').
format(self.current_device, self.current_format),
show=True)
self.new_device.setCurrentIndex(0)
return
self.set_fields()
def new_format_changed(self, txt):

View File

@ -8,6 +8,7 @@ __docformat__ = 'restructuredtext en'
import re
import time
import traceback
from contextlib import closing
from random import shuffle
from threading import Thread
@ -20,9 +21,12 @@ from calibre import browser
from calibre.gui2 import NONE
from calibre.gui2.progress_indicator import ProgressIndicator
from calibre.gui2.store.search_ui import Ui_Dialog
from calibre.library.caches import _match, CONTAINS_MATCH, EQUALS_MATCH, \
REGEXP_MATCH
from calibre.utils.config import DynamicConfig
from calibre.utils.icu import sort_key
from calibre.utils.magick.draw import thumbnail
from calibre.utils.search_query_parser import SearchQueryParser
HANG_TIME = 75000 # milliseconds seconds
TIMEOUT = 75 # seconds
@ -290,11 +294,15 @@ class SearchThread(Thread):
while self._run and not self.tasks.empty():
try:
query, store_name, store_plugin, timeout = self.tasks.get()
for res in store_plugin.search(query, timeout=timeout):
squery = query
for loc in SearchFilter.USABLE_LOCATIONS:
squery = re.sub(r'%s:"?(?P<a>[^\s"]+)"?' % loc, '\g<a>', squery)
for res in store_plugin.search(squery, timeout=timeout):
if not self._run:
return
res.store_name = store_name
self.results.put(res)
if SearchFilter(res).parse(query):
self.results.put(res)
self.tasks.task_done()
except:
pass
@ -450,3 +458,82 @@ class Matches(QAbstractItemModel):
if reset:
self.reset()
class SearchFilter(SearchQueryParser):
USABLE_LOCATIONS = [
'all',
'author',
'authors',
'cover',
'price',
'title',
'store',
]
def __init__(self, search_result):
SearchQueryParser.__init__(self, locations=self.USABLE_LOCATIONS)
self.search_result = search_result
def universal_set(self):
return set([self.search_result])
def get_matches(self, location, query):
location = location.lower().strip()
if location == 'authors':
location = 'author'
matchkind = CONTAINS_MATCH
if len(query) > 1:
if query.startswith('\\'):
query = query[1:]
elif query.startswith('='):
matchkind = EQUALS_MATCH
query = query[1:]
elif query.startswith('~'):
matchkind = REGEXP_MATCH
query = query[1:]
if matchkind != REGEXP_MATCH: ### leave case in regexps because it can be significant e.g. \S \W \D
query = query.lower()
if location not in self.USABLE_LOCATIONS:
return set([])
matches = set([])
all_locs = set(self.USABLE_LOCATIONS) - set(['all'])
locations = all_locs if location == 'all' else [location]
q = {
'author': self.search_result.author.lower(),
'cover': self.search_result.cover_url,
'format': '',
'price': self.search_result.price,
'store': self.search_result.store_name.lower(),
'title': self.search_result.title.lower(),
}
for x in ('author', 'format'):
q[x+'s'] = q[x]
for locvalue in locations:
ac_val = q[locvalue]
if query == 'true':
if ac_val is not None:
matches.add(self.search_result)
continue
if query == 'false':
if ac_val is None:
matches.add(self.search_result)
continue
try:
### Can't separate authors because comma is used for name sep and author sep
### Exact match might not get what you want. For that reason, turn author
### exactmatch searches into contains searches.
if locvalue == 'author' and matchkind == EQUALS_MATCH:
m = CONTAINS_MATCH
else:
m = matchkind
vals = [ac_val]
if _match(query, vals, m):
matches.add(self.search_result)
break
except ValueError: # Unicode errors
traceback.print_exc()
return matches

View File

@ -31,10 +31,14 @@ class NPWebView(QWebView):
proxy_parts = urlparse(http_proxy)
proxy = QNetworkProxy()
proxy.setType(QNetworkProxy.HttpProxy)
proxy.setUser(proxy_parts.username)
proxy.setPassword(proxy_parts.password)
proxy.setHostName(proxy_parts.hostname)
proxy.setPort(proxy_parts.port)
if proxy_parts.username:
proxy.setUser(proxy_parts.username)
if proxy_parts.password:
proxy.setPassword(proxy_parts.password)
if proxy_parts.hostname:
proxy.setHostName(proxy_parts.hostname)
if proxy_parts.port:
proxy.setPort(proxy_parts.port)
self.page().networkAccessManager().setProxy(proxy)
self.page().setForwardUnsupportedContent(True)

View File

@ -51,6 +51,23 @@ for x in FORMAT_ARG_DESCS:
FORMAT_ARGS[x] = ''
def find_plugboard(device_name, format, plugboards):
cpb = None
if format in plugboards:
cpb = plugboards[format]
elif plugboard_any_format_value in plugboards:
cpb = plugboards[plugboard_any_format_value]
if cpb is not None:
if device_name in cpb:
cpb = cpb[device_name]
elif plugboard_any_device_value in cpb:
cpb = cpb[plugboard_any_device_value]
else:
cpb = None
if DEBUG:
prints('Device using plugboard', format, device_name, cpb)
return cpb
def config(defaults=None):
if defaults is None:
c = Config('save_to_disk', _('Options to control saving to disk'))
@ -279,20 +296,7 @@ def do_save_book_to_disk(id_, mi, cover, plugboards,
written = False
for fmt in formats:
global plugboard_save_to_disk_value, plugboard_any_format_value
dev_name = plugboard_save_to_disk_value
cpb = None
if fmt in plugboards:
cpb = plugboards[fmt]
if dev_name in cpb:
cpb = cpb[dev_name]
else:
cpb = None
if cpb is None and plugboard_any_format_value in plugboards:
cpb = plugboards[plugboard_any_format_value]
if dev_name in cpb:
cpb = cpb[dev_name]
else:
cpb = None
cpb = find_plugboard(plugboard_save_to_disk_value, fmt, plugboards)
# Leave this here for a while, in case problems arise.
if cpb is not None:
prints('Save-to-disk using plugboard:', fmt, cpb)

View File

@ -12,9 +12,14 @@ import cherrypy
from calibre import fit_image, guess_type
from calibre.utils.date import fromtimestamp
from calibre.library.caches import SortKeyGenerator
from calibre.library.save_to_disk import find_plugboard
from calibre.utils.magick.draw import save_cover_data_to, Image, \
thumbnail as generate_thumbnail
plugboard_content_server_value = 'content_server'
plugboard_content_server_formats = ['epub']
class CSSortKeyGenerator(SortKeyGenerator):
def __init__(self, fields, fm, db_prefs):
@ -183,16 +188,30 @@ class ContentServer(object):
if fmt is None:
raise cherrypy.HTTPError(404, 'book: %d does not have format: %s'%(id, format))
if format == 'EPUB':
# Get the original metadata
mi = self.db.get_metadata(id, index_is_id=True)
# Get any EPUB plugboards for the content server
plugboards = self.db.prefs.get('plugboards', {})
cpb = find_plugboard(plugboard_content_server_value,
'epub', plugboards)
if cpb:
# Transform the metadata via the plugboard
newmi = mi.deepcopy_metadata()
newmi.template_to_attribute(mi, cpb)
else:
newmi = mi
# Write the updated file
from tempfile import TemporaryFile
from calibre.ebooks.metadata.meta import set_metadata
raw = fmt.read()
fmt = TemporaryFile()
fmt.write(raw)
fmt.seek(0)
set_metadata(fmt, self.db.get_metadata(id, index_is_id=True,
get_cover=True),
'epub')
set_metadata(fmt, newmi, 'epub')
fmt.seek(0)
mt = guess_type('dummy.'+format.lower())[0]
if mt is None:
mt = 'application/octet-stream'