Various improvements to Get Books. Also accurate APNX generation now handles the case of MOBI files that use the <div> tag for paragraphs

This commit is contained in:
Kovid Goyal 2011-04-21 18:45:59 -06:00
commit 6982d2ec24
22 changed files with 501 additions and 93 deletions

Binary file not shown.

After

Width:  |  Height:  |  Size: 1.6 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 1.6 KiB

View File

@ -164,7 +164,7 @@ class APNXBuilder(object):
if c == '/': if c == '/':
closing = True closing = True
continue continue
elif c == 'p': elif c in ('d', 'p'):
if closing: if closing:
in_p = False in_p = False
else: else:

View File

@ -10,6 +10,7 @@ from functools import partial
from PyQt4.Qt import QMenu from PyQt4.Qt import QMenu
from calibre.gui2 import JSONConfig
from calibre.gui2.actions import InterfaceAction from calibre.gui2.actions import InterfaceAction
class StoreAction(InterfaceAction): class StoreAction(InterfaceAction):
@ -18,6 +19,8 @@ class StoreAction(InterfaceAction):
action_spec = (_('Get books'), 'store.png', None, None) action_spec = (_('Get books'), 'store.png', None, None)
def genesis(self): def genesis(self):
self.config = JSONConfig('store_action')
self.qaction.triggered.connect(self.search) self.qaction.triggered.connect(self.search)
self.store_menu = QMenu() self.store_menu = QMenu()
self.load_menu() self.load_menu()
@ -31,9 +34,34 @@ class StoreAction(InterfaceAction):
self.qaction.setMenu(self.store_menu) self.qaction.setMenu(self.store_menu)
def search(self): def search(self):
self.first_run_check()
from calibre.gui2.store.search import SearchDialog from calibre.gui2.store.search import SearchDialog
sd = SearchDialog(self.gui.istores, self.gui) sd = SearchDialog(self.gui.istores, self.gui)
sd.exec_() sd.exec_()
def open_store(self, store_plugin): def open_store(self, store_plugin):
self.first_run_check()
store_plugin.open(self.gui) store_plugin.open(self.gui)
def first_run_check(self):
if self.config.get('first_run', True):
self.config['first_run'] = False
from calibre.gui2 import info_dialog
info_dialog(self.gui, _('About Get Books'), '<p>' +
_('Calibre helps you find the ebooks you want by searching '
'the websites of a variety of commercial and public domain '
'book sources for you.') +
'<p>' +
_('Using the integrated search you can easily find which '
'store has the book you are looking for, at the best price. '
'You will also get DRM status and other useful information.')
+ '<p>' +
_('All transactions (paid or otherwise) are handled between '
'you and the particular website. '
'Calibre is not part of this process and any issues related '
'to a purchase should be directed to the website you are '
'buying from. Be sure to double check that any books you get '
'will work with your e-book reader, especially if the book you '
'are buying has '
'<a href="http://drmfree.calibre-ebook.com/about#drm">DRM</a>.'
), show=True, show_copy_button=False)

View File

@ -76,10 +76,16 @@ class StorePlugin(object): # {{{
return items as a generator. return items as a generator.
Don't be lazy with the search! Load as much data as possible in the Don't be lazy with the search! Load as much data as possible in the
:class:`calibre.gui2.store.search_result.SearchResult` object. If you have to parse :class:`calibre.gui2.store.search_result.SearchResult` object.
multiple pages to get all of the data then do so. However, if data (such as cover_url) However, if data (such as cover_url)
isn't available because the store does not display cover images then it's okay to isn't available because the store does not display cover images then it's okay to
ignore it. ignore it.
At the very least a :class:`calibre.gui2.store.search_result.SearchResult`
returned by this function must have the title, author and id.
If you have to parse multiple pages to get all of the data then implement
:meth:`get_deatils` for retrieving additional information.
Also, by default search results can only include ebooks. A plugin can offer users Also, by default search results can only include ebooks. A plugin can offer users
an option to include physical books in the search results but this must be an option to include physical books in the search results but this must be
@ -90,12 +96,33 @@ class StorePlugin(object): # {{{
:param query: The string query search with. :param query: The string query search with.
:param max_results: The maximum number of results to return. :param max_results: The maximum number of results to return.
:param timeout: The maximum amount of time in seconds to spend download the search results. :param timeout: The maximum amount of time in seconds to spend downloading data for search results.
:return: :class:`calibre.gui2.store.search_result.SearchResult` objects :return: :class:`calibre.gui2.store.search_result.SearchResult` objects
item_data is plugin specific and is used in :meth:`open` to open to a specifc place in the store. item_data is plugin specific and is used in :meth:`open` to open to a specifc place in the store.
''' '''
raise NotImplementedError() raise NotImplementedError()
def get_details(self, search_result, timeout=60):
'''
Delayed search for information about specific search items.
Typically, this will be used when certain information such as
formats, drm status, cover url are not part of the main search
results and the information is on another web page.
Using this function allows for the main information (title, author)
to be displayed in the search results while other information can
take extra time to load. Splitting retrieving data that takes longer
to load into a separate function will give the illusion of the search
being faster.
:param search_result: A search result that need details set.
:param timeout: The maximum amount of time in seconds to spend downloading details.
:return: True if the search_result was modified otherwise False
'''
return False
def get_settings(self): def get_settings(self):
''' '''

View File

@ -168,5 +168,23 @@ class AmazonKindleStore(StorePlugin):
s.author = author.strip() s.author = author.strip()
s.price = price.strip() s.price = price.strip()
s.detail_item = asin.strip() s.detail_item = asin.strip()
s.formats = 'Kindle'
yield s yield s
def get_details(self, search_result, timeout):
url = 'http://amazon.com/dp/'
br = browser()
with closing(br.open(url + search_result.detail_item, timeout=timeout)) as nf:
idata = html.fromstring(nf.read())
if idata.xpath('boolean(//div[@class="content"]//li/b[contains(text(), "Simultaneous Device Usage")])'):
if idata.xpath('boolean(//div[@class="content"]//li[contains(., "Unlimited") and contains(b, "Simultaneous Device Usage")])'):
search_result.drm = SearchResult.DRM_UNLOCKED
else:
search_result.drm = SearchResult.DRM_UNKNOWN
else:
search_result.drm = SearchResult.DRM_LOCKED
return True

View File

@ -85,5 +85,7 @@ class BaenWebScriptionStore(BasicStoreConfig, StorePlugin):
s.author = author.strip() s.author = author.strip()
s.price = price s.price = price
s.detail_item = id.strip() s.detail_item = id.strip()
s.drm = SearchResult.DRM_UNLOCKED
s.formats = 'RB, MOBI, EPUB, LIT, LRF, RTF, HTML'
yield s yield s

View File

@ -60,14 +60,6 @@ class BeWriteStore(BasicStoreConfig, StorePlugin):
cover_url = '' cover_url = ''
price = '' price = ''
with closing(br.open(id.strip(), timeout=timeout/4)) as nf:
idata = html.fromstring(nf.read())
price = ''.join(idata.xpath('//div[@id="content"]//td[contains(text(), "ePub")]/text()'))
price = '$' + price.split('$')[-1]
cover_img = idata.xpath('//div[@id="content"]//img[1]/@src')
if cover_img:
cover_url = 'http://www.bewrite.net/mm5/' + cover_img[0]
counter -= 1 counter -= 1
s = SearchResult() s = SearchResult()
@ -76,5 +68,36 @@ class BeWriteStore(BasicStoreConfig, StorePlugin):
s.author = author.strip() s.author = author.strip()
s.price = price.strip() s.price = price.strip()
s.detail_item = id.strip() s.detail_item = id.strip()
s.drm = SearchResult.DRM_UNLOCKED
yield s yield s
def get_details(self, search_result, timeout):
br = browser()
with closing(br.open(search_result.detail_item, timeout=timeout)) as nf:
idata = html.fromstring(nf.read())
price = ''.join(idata.xpath('//div[@id="content"]//td[contains(text(), "ePub")]/text()'))
if not price:
price = ''.join(idata.xpath('//div[@id="content"]//td[contains(text(), "MOBI")]/text()'))
if not price:
price = ''.join(idata.xpath('//div[@id="content"]//td[contains(text(), "PDF")]/text()'))
price = '$' + price.split('$')[-1]
search_result.price = price.strip()
cover_img = idata.xpath('//div[@id="content"]//img[1]/@src')
if cover_img:
cover_url = 'http://www.bewrite.net/mm5/' + cover_img[0]
search_result.cover_url = cover_url.strip()
formats = set([])
if idata.xpath('boolean(//div[@id="content"]//td[contains(text(), "ePub")])'):
formats.add('EPUB')
if idata.xpath('boolean(//div[@id="content"]//td[contains(text(), "PDF")])'):
formats.add('PDF')
if idata.xpath('boolean(//div[@id="content"]//td[contains(text(), "MOBI")])'):
formats.add('MOBI')
search_result.formats = ', '.join(list(formats))
return True

View File

@ -78,5 +78,7 @@ class BNStore(BasicStoreConfig, StorePlugin):
s.author = author.strip() s.author = author.strip()
s.price = price s.price = price
s.detail_item = id.strip() s.detail_item = id.strip()
s.drm = SearchResult.DRM_UNKNOWN
s.formats = 'Nook'
yield s yield s

View File

@ -75,6 +75,8 @@ class DieselEbooksStore(BasicStoreConfig, StorePlugin):
if price_elem: if price_elem:
price = price_elem[0] price = price_elem[0]
formats = ', '.join(data.xpath('.//td[@class="format"]/text()'))
counter -= 1 counter -= 1
s = SearchResult() s = SearchResult()
@ -83,5 +85,18 @@ class DieselEbooksStore(BasicStoreConfig, StorePlugin):
s.author = author.strip() s.author = author.strip()
s.price = price.strip() s.price = price.strip()
s.detail_item = '/item/' + id.strip() s.detail_item = '/item/' + id.strip()
s.formats = formats
yield s yield s
def get_details(self, search_result, timeout):
url = 'http://www.diesel-ebooks.com/item/'
br = browser()
with closing(br.open(url + search_result.detail_item, timeout=timeout)) as nf:
idata = html.fromstring(nf.read())
if idata.xpath('boolean(//table[@class="format-info"]//tr[contains(th, "DRM") and contains(td, "No")])'):
search_result.drm = SearchResult.DRM_UNLOCKED
else:
search_result.drm = SearchResult.DRM_LOCKED
return True

View File

@ -7,6 +7,7 @@ __copyright__ = '2011, John Schember <john@nachtimwald.com>'
__docformat__ = 'restructuredtext en' __docformat__ = 'restructuredtext en'
import random import random
import re
import urllib2 import urllib2
from contextlib import closing from contextlib import closing
@ -63,15 +64,6 @@ class EbookscomStore(BasicStoreConfig, StorePlugin):
id = id.split('=')[-1] id = id.split('=')[-1]
if not id: if not id:
continue continue
price = ''
with closing(br.open('http://www.ebooks.com/ebooks/book_display.asp?IID=' + id.strip(), timeout=timeout)) as fp:
pdoc = html.fromstring(fp.read())
pdata = pdoc.xpath('//table[@class="price"]/tr/td/text()')
if len(pdata) >= 2:
price = pdata[1]
if not price:
continue
cover_url = ''.join(data.xpath('.//img[1]/@src')) cover_url = ''.join(data.xpath('.//img[1]/@src'))
@ -89,7 +81,40 @@ class EbookscomStore(BasicStoreConfig, StorePlugin):
s.cover_url = cover_url s.cover_url = cover_url
s.title = title.strip() s.title = title.strip()
s.author = author.strip() s.author = author.strip()
s.price = price.strip()
s.detail_item = '?url=http://www.ebooks.com/cj.asp?IID=' + id.strip() + '&cjsku=' + id.strip() s.detail_item = '?url=http://www.ebooks.com/cj.asp?IID=' + id.strip() + '&cjsku=' + id.strip()
yield s yield s
def get_details(self, search_result, timeout):
url = 'http://www.ebooks.com/ebooks/book_display.asp?IID='
mo = re.search(r'\?IID=(?P<id>\d+)', search_result.detail_item)
if mo:
id = mo.group('id')
if not id:
return
price = _('Not Available')
br = browser()
with closing(br.open(url + id, timeout=timeout)) as nf:
pdoc = html.fromstring(nf.read())
pdata = pdoc.xpath('//table[@class="price"]/tr/td/text()')
if len(pdata) >= 2:
price = pdata[1]
search_result.drm = SearchResult.DRM_UNLOCKED
for sec in ('Printing', 'Copying', 'Lending'):
if pdoc.xpath('boolean(//div[@class="formatTableInner"]//table//tr[contains(th, "%s") and contains(td, "Off")])' % sec):
search_result.drm = SearchResult.DRM_LOCKED
break
fdata = ', '.join(pdoc.xpath('//table[@class="price"]//tr//td[1]/text()'))
fdata = fdata.replace(':', '')
fdata = re.sub(r'\s{2,}', ' ', fdata)
fdata = fdata.replace(' ,', ',')
fdata = fdata.strip()
search_result.formats = fdata
search_result.price = price.strip()
return True

View File

@ -7,6 +7,7 @@ __copyright__ = '2011, John Schember <john@nachtimwald.com>'
__docformat__ = 'restructuredtext en' __docformat__ = 'restructuredtext en'
import random import random
import re
import urllib2 import urllib2
from contextlib import closing from contextlib import closing
@ -69,12 +70,35 @@ class EHarlequinStore(BasicStoreConfig, StorePlugin):
cover_url = ''.join(data.xpath('.//a[@href="%s"]/img/@src' % id)) cover_url = ''.join(data.xpath('.//a[@href="%s"]/img/@src' % id))
counter -= 1 counter -= 1
s = SearchResult() s = SearchResult()
s.cover_url = cover_url s.cover_url = cover_url
s.title = title.strip() s.title = title.strip()
s.author = author.strip() s.author = author.strip()
s.price = price.strip() s.price = price.strip()
s.detail_item = '?url=http://ebooks.eharlequin.com/' + id.strip() s.detail_item = '?url=http://ebooks.eharlequin.com/' + id.strip()
s.formats = 'EPUB'
yield s yield s
def get_details(self, search_result, timeout):
url = 'http://ebooks.eharlequin.com/en/ContentDetails.htm?ID='
mo = re.search(r'\?ID=(?P<id>.+)', search_result.detail_item)
if mo:
id = mo.group('id')
if not id:
return
br = browser()
with closing(br.open(url + id, timeout=timeout)) as nf:
idata = html.fromstring(nf.read())
drm = SearchResult.DRM_UNKNOWN
if idata.xpath('boolean(//div[@class="drm_head"])'):
if idata.xpath('boolean(//td[contains(., "Copy") and contains(., "not")])'):
drm = SearchResult.DRM_LOCKED
else:
drm = SearchResult.DRM_UNLOCKED
search_result.drm = drm
return True

View File

@ -72,8 +72,10 @@ class FeedbooksStore(BasicStoreConfig, StorePlugin):
title = ''.join(data.xpath('//h5//a/text()')) title = ''.join(data.xpath('//h5//a/text()'))
author = ''.join(data.xpath('//h6//a/text()')) author = ''.join(data.xpath('//h6//a/text()'))
price = ''.join(data.xpath('//a[@class="buy"]/text()')) price = ''.join(data.xpath('//a[@class="buy"]/text()'))
formats = 'EPUB'
if not price: if not price:
price = '$0.00' price = '$0.00'
formats = 'EPUB, MOBI, PDF'
cover_url = '' cover_url = ''
cover_url_img = data.xpath('//img') cover_url_img = data.xpath('//img')
if cover_url_img: if cover_url_img:
@ -88,5 +90,18 @@ class FeedbooksStore(BasicStoreConfig, StorePlugin):
s.author = author.strip() s.author = author.strip()
s.price = price.replace(' ', '').strip() s.price = price.replace(' ', '').strip()
s.detail_item = id.strip() s.detail_item = id.strip()
s.formats = formats
yield s yield s
def get_details(self, search_result, timeout):
url = 'http://m.feedbooks.com/'
br = browser()
with closing(br.open(url_slash_cleaner(url + search_result.detail_item), timeout=timeout)) as nf:
idata = html.fromstring(nf.read())
if idata.xpath('boolean(//div[contains(@class, "m-description-long")]//p[contains(., "DRM") or contains(b, "Protection")])'):
search_result.drm = SearchResult.DRM_LOCKED
else:
search_result.drm = SearchResult.DRM_UNLOCKED
return True

View File

@ -79,5 +79,15 @@ class GutenbergStore(BasicStoreConfig, StorePlugin):
s.author = author.strip() s.author = author.strip()
s.price = price.strip() s.price = price.strip()
s.detail_item = '/ebooks/' + id.strip() s.detail_item = '/ebooks/' + id.strip()
s.drm = SearchResult.DRM_UNLOCKED
yield s yield s
def get_details(self, search_result, timeout):
url = 'http://m.gutenberg.org/'
br = browser()
with closing(br.open(url + search_result.detail_item, timeout=timeout)) as nf:
idata = html.fromstring(nf.read())
search_result.formats = ', '.join(idata.xpath('//a[@type!="application/atom+xml"]//span[@class="title"]/text()'))
return True

View File

@ -63,7 +63,7 @@ class KoboStore(BasicStoreConfig, StorePlugin):
if not id: if not id:
continue continue
price = ''.join(data.xpath('.//span[@class="SCOurPrice"]/strong/text()')) price = ''.join(data.xpath('.//li[@class="OurPrice"]/strong/text()'))
if not price: if not price:
price = '$0.00' price = '$0.00'
@ -71,6 +71,7 @@ class KoboStore(BasicStoreConfig, StorePlugin):
title = ''.join(data.xpath('.//div[@class="SCItemHeader"]/h1/a[1]/text()')) title = ''.join(data.xpath('.//div[@class="SCItemHeader"]/h1/a[1]/text()'))
author = ''.join(data.xpath('.//div[@class="SCItemSummary"]/span/a[1]/text()')) author = ''.join(data.xpath('.//div[@class="SCItemSummary"]/span/a[1]/text()'))
drm = data.xpath('boolean(.//span[@class="SCAvailibilityFormatsText" and contains(text(), "DRM")])')
counter -= 1 counter -= 1
@ -80,5 +81,7 @@ class KoboStore(BasicStoreConfig, StorePlugin):
s.author = author.strip() s.author = author.strip()
s.price = price.strip() s.price = price.strip()
s.detail_item = '?url=http://www.kobobooks.com/' + id.strip() s.detail_item = '?url=http://www.kobobooks.com/' + id.strip()
s.drm = SearchResult.DRM_LOCKED if drm else SearchResult.DRM_UNLOCKED
s.formats = 'EPUB'
yield s yield s

View File

@ -89,5 +89,7 @@ class ManyBooksStore(BasicStoreConfig, StorePlugin):
s.author = author.strip() s.author = author.strip()
s.price = price.strip() s.price = price.strip()
s.detail_item = '/titles/' + id s.detail_item = '/titles/' + id
s.drm = SearchResult.DRM_UNLOCKED
s.formts = 'EPUB, PDB (eReader, PalmDoc, zTXT, Plucker, iSilo), FB2, ZIP, AZW, MOBI, PRC, LIT, PKG, PDF, TXT, RB, RTF, LRF, TCR, JAR'
yield s yield s

View File

@ -76,6 +76,7 @@ class MobileReadStore(BasicStoreConfig, StorePlugin):
matches = heapq.nlargest(max_results, matches) matches = heapq.nlargest(max_results, matches)
for score, book in matches: for score, book in matches:
book.price = '$0.00' book.price = '$0.00'
book.drm = SearchResult.DRM_UNLOCKED
yield book yield book
def update_book_list(self, timeout=10): def update_book_list(self, timeout=10):
@ -103,8 +104,8 @@ class MobileReadStore(BasicStoreConfig, StorePlugin):
for book_data in data.xpath('//ul/li'): for book_data in data.xpath('//ul/li'):
book = BookRef() book = BookRef()
book.detail_item = ''.join(book_data.xpath('.//a/@href')) book.detail_item = ''.join(book_data.xpath('.//a/@href'))
book.format = ''.join(book_data.xpath('.//i/text()')) book.formats = ''.join(book_data.xpath('.//i/text()'))
book.format = book.format.strip() book.formats = book.formats.strip()
text = ''.join(book_data.xpath('.//a/text()')) text = ''.join(book_data.xpath('.//a/text()'))
if ':' in text: if ':' in text:
@ -130,8 +131,6 @@ class BookRef(SearchResult):
def __init__(self): def __init__(self):
SearchResult.__init__(self) SearchResult.__init__(self)
self.format = ''
class MobeReadStoreDialog(QDialog, Ui_Dialog): class MobeReadStoreDialog(QDialog, Ui_Dialog):
@ -223,7 +222,7 @@ class BooksModel(QAbstractItemModel):
self.books = [] self.books = []
if self.filter: if self.filter:
for b in self.all_books: for b in self.all_books:
test = '%s %s %s' % (b.title, b.author, b.format) test = '%s %s %s' % (b.title, b.author, b.formats)
test = test.lower() test = test.lower()
include = True include = True
for item in self.filter.split(' '): for item in self.filter.split(' '):
@ -276,7 +275,7 @@ class BooksModel(QAbstractItemModel):
elif col == 1: elif col == 1:
return QVariant(result.author) return QVariant(result.author)
elif col == 2: elif col == 2:
return QVariant(result.format) return QVariant(result.formats)
return NONE return NONE
def data_as_text(self, result, col): def data_as_text(self, result, col):
@ -286,7 +285,7 @@ class BooksModel(QAbstractItemModel):
elif col == 1: elif col == 1:
text = result.author text = result.author
elif col == 2: elif col == 2:
text = result.format text = result.formats
return text return text
def sort(self, col, order, reset=True): def sort(self, col, order, reset=True):

View File

@ -68,5 +68,15 @@ class OpenLibraryStore(BasicStoreConfig, StorePlugin):
s.author = author.strip() s.author = author.strip()
s.price = price s.price = price
s.detail_item = id.strip() s.detail_item = id.strip()
s.drm = SearchResult.DRM_UNKNOWN
yield s yield s
def get_details(self, search_result, timeout):
url = 'http://openlibrary.org/'
br = browser()
with closing(br.open(url_slash_cleaner(url + search_result.detail_item), timeout=timeout)) as nf:
idata = html.fromstring(nf.read())
search_result.formats = ', '.join(list(set(idata.xpath('//a[contains(@title, "Download")]/text()'))))
return True

View File

@ -21,6 +21,7 @@ from calibre import browser
from calibre.gui2 import NONE from calibre.gui2 import NONE
from calibre.gui2.progress_indicator import ProgressIndicator from calibre.gui2.progress_indicator import ProgressIndicator
from calibre.gui2.store.search_ui import Ui_Dialog from calibre.gui2.store.search_ui import Ui_Dialog
from calibre.gui2.store.search_result import SearchResult
from calibre.library.caches import _match, CONTAINS_MATCH, EQUALS_MATCH, \ from calibre.library.caches import _match, CONTAINS_MATCH, EQUALS_MATCH, \
REGEXP_MATCH REGEXP_MATCH
from calibre.utils.config import DynamicConfig from calibre.utils.config import DynamicConfig
@ -33,6 +34,14 @@ TIMEOUT = 75 # seconds
SEARCH_THREAD_TOTAL = 4 SEARCH_THREAD_TOTAL = 4
COVER_DOWNLOAD_THREAD_TOTAL = 2 COVER_DOWNLOAD_THREAD_TOTAL = 2
def comparable_price(text):
if len(text) < 3 or text[-3] not in ('.', ','):
text += '00'
text = re.sub(r'\D', '', text)
text = text.rjust(6, '0')
return text
class SearchDialog(QDialog, Ui_Dialog): class SearchDialog(QDialog, Ui_Dialog):
def __init__(self, istores, *args): def __init__(self, istores, *args):
@ -87,9 +96,13 @@ class SearchDialog(QDialog, Ui_Dialog):
# Author # Author
self.results_view.setColumnWidth(2,int(total*.35)) self.results_view.setColumnWidth(2,int(total*.35))
# Price # Price
self.results_view.setColumnWidth(3, int(total*.10)) self.results_view.setColumnWidth(3, int(total*.5))
# DRM
self.results_view.setColumnWidth(4, int(total*.5))
# Store # Store
self.results_view.setColumnWidth(4, int(total*.20)) self.results_view.setColumnWidth(5, int(total*.15))
# Formats
self.results_view.setColumnWidth(6, int(total*.5))
def do_search(self, checked=False): def do_search(self, checked=False):
# Stop all running threads. # Stop all running threads.
@ -102,6 +115,9 @@ class SearchDialog(QDialog, Ui_Dialog):
query = unicode(self.search_edit.text()) query = unicode(self.search_edit.text())
if not query.strip(): if not query.strip():
return return
# Give the query to the results model so it can do
# futher filtering.
self.results_view.model().set_query(query)
# Plugins are in alphebetic order. Randomize the # Plugins are in alphebetic order. Randomize the
# order of plugin names. This way plugins closer # order of plugin names. This way plugins closer
@ -110,6 +126,8 @@ class SearchDialog(QDialog, Ui_Dialog):
store_names = self.store_plugins.keys() store_names = self.store_plugins.keys()
if not store_names: if not store_names:
return return
# Remove all of our internal filtering logic from the query.
query = self.clean_query(query)
shuffle(store_names) shuffle(store_names)
# Add plugins that the user has checked to the search pool's work queue. # Add plugins that the user has checked to the search pool's work queue.
for n in store_names: for n in store_names:
@ -121,6 +139,29 @@ class SearchDialog(QDialog, Ui_Dialog):
self.search_pool.start_threads() self.search_pool.start_threads()
self.pi.startAnimation() self.pi.startAnimation()
def clean_query(self, query):
query = query.lower()
# Remove control modifiers.
query = query.replace('\\', '')
query = query.replace('!', '')
query = query.replace('=', '')
query = query.replace('~', '')
query = query.replace('>', '')
query = query.replace('<', '')
# Remove the prefix.
for loc in ( 'all', 'author', 'authors', 'title'):
query = re.sub(r'%s:"?(?P<a>[^\s"]+)"?' % loc, '\g<a>', query)
# Remove the prefix and search text.
for loc in ('cover', 'drm', 'format', 'formats', 'price', 'store'):
query = re.sub(r'%s:"[^"]"' % loc, '', query)
query = re.sub(r'%s:[^\s]*' % loc, '', query)
# Remove logic.
query = re.sub(r'(^|\s)(and|not|or)(\s|$)', ' ', query)
# Remove excess whitespace.
query = re.sub(r'\s{2,}', ' ', query)
query = query.strip()
return query
def save_state(self): def save_state(self):
self.config['store_search_geometry'] = self.saveGeometry() self.config['store_search_geometry'] = self.saveGeometry()
self.config['store_search_store_splitter_state'] = self.store_splitter.saveState() self.config['store_search_store_splitter_state'] = self.store_splitter.saveState()
@ -170,9 +211,9 @@ class SearchDialog(QDialog, Ui_Dialog):
self.pi.stopAnimation() self.pi.stopAnimation()
while self.search_pool.has_results(): while self.search_pool.has_results():
res = self.search_pool.get_result() res, store_plugin = self.search_pool.get_result()
if res: if res:
self.results_view.model().add_result(res) self.results_view.model().add_result(res, store_plugin)
def open_store(self, index): def open_store(self, index):
result = self.results_view.model().get_result(index) result = self.results_view.model().get_result(index)
@ -294,18 +335,14 @@ class SearchThread(Thread):
while self._run and not self.tasks.empty(): while self._run and not self.tasks.empty():
try: try:
query, store_name, store_plugin, timeout = self.tasks.get() query, store_name, store_plugin, timeout = self.tasks.get()
squery = query for res in store_plugin.search(query, timeout=timeout):
for loc in SearchFilter.USABLE_LOCATIONS:
squery = re.sub(r'%s:"?(?P<a>[^\s"]+)"?' % loc, '\g<a>', squery)
for res in store_plugin.search(squery, timeout=timeout):
if not self._run: if not self._run:
return return
res.store_name = store_name res.store_name = store_name
if SearchFilter(res).parse(query): self.results.put((res, store_plugin))
self.results.put(res)
self.tasks.task_done() self.tasks.task_done()
except: except:
pass traceback.print_exc()
class CoverThreadPool(GenericDownloadThreadPool): class CoverThreadPool(GenericDownloadThreadPool):
@ -349,29 +386,93 @@ class CoverThread(Thread):
continue continue
class DetailsThreadPool(GenericDownloadThreadPool):
'''
Once started all threads run until abort is called.
'''
def add_task(self, search_result, store_plugin, update_callback, timeout=10):
self.tasks.put((search_result, store_plugin, update_callback, timeout))
class DetailsThread(Thread):
def __init__(self, tasks, results):
Thread.__init__(self)
self.daemon = True
self.tasks = tasks
self.results = results
self._run = True
def abort(self):
self._run = False
def run(self):
while self._run:
try:
time.sleep(.1)
while not self.tasks.empty():
if not self._run:
break
result, store_plugin, callback, timeout = self.tasks.get()
if result:
store_plugin.get_details(result, timeout)
callback(result)
self.tasks.task_done()
except:
continue
class Matches(QAbstractItemModel): class Matches(QAbstractItemModel):
HEADERS = [_('Cover'), _('Title'), _('Author(s)'), _('Price'), _('Store')] HEADERS = [_('Cover'), _('Title'), _('Author(s)'), _('Price'), _('DRM'), _('Store'), _('Formats')]
def __init__(self): def __init__(self):
QAbstractItemModel.__init__(self) QAbstractItemModel.__init__(self)
self.DRM_LOCKED_ICON = QPixmap(I('drm-locked.png')).scaledToHeight(64)
self.DRM_UNLOCKED_ICON = QPixmap(I('drm-unlocked.png')).scaledToHeight(64)
self.DRM_UNKNOWN_ICON = QPixmap(I('dialog_warning.png')).scaledToHeight(64)
# All matches. Used to determine the order to display
# self.matches because the SearchFilter returns
# matches unordered.
self.all_matches = []
# Only the showing matches.
self.matches = [] self.matches = []
self.query = ''
self.search_filter = SearchFilter()
self.cover_pool = CoverThreadPool(CoverThread, 2) self.cover_pool = CoverThreadPool(CoverThread, 2)
self.cover_pool.start_threads() self.cover_pool.start_threads()
self.details_pool = DetailsThreadPool(DetailsThread, 4)
self.details_pool.start_threads()
def closing(self): def closing(self):
self.cover_pool.abort() self.cover_pool.abort()
self.details_pool.abort()
def clear_results(self): def clear_results(self):
self.all_matches = []
self.matches = [] self.matches = []
self.all_matches = []
self.search_filter.clear_search_results()
self.query = ''
self.cover_pool.abort() self.cover_pool.abort()
self.cover_pool.start_threads() self.cover_pool.start_threads()
self.details_pool.abort()
self.details_pool.start_threads()
self.reset() self.reset()
def add_result(self, result): def add_result(self, result, store_plugin):
self.layoutAboutToBeChanged.emit() self.layoutAboutToBeChanged.emit()
self.matches.append(result) self.all_matches.append(result)
self.cover_pool.add_task(result, self.update_result) self.search_filter.add_search_result(result)
if result.cover_url:
result.cover_queued = True
self.cover_pool.add_task(result, self.filter_results)
else:
result.cover_queued = False
self.details_pool.add_task(result, store_plugin, self.got_result_details)
self.filter_results()
self.layoutChanged.emit() self.layoutChanged.emit()
def get_result(self, index): def get_result(self, index):
@ -380,11 +481,30 @@ class Matches(QAbstractItemModel):
return self.matches[row] return self.matches[row]
else: else:
return None return None
def update_result(self): def filter_results(self):
self.layoutAboutToBeChanged.emit() self.layoutAboutToBeChanged.emit()
if self.query:
self.matches = list(self.search_filter.parse(self.query))
else:
self.matches = list(self.search_filter.universal_set())
self.reorder_matches()
self.layoutChanged.emit() self.layoutChanged.emit()
def got_result_details(self, result):
if not result.cover_queued and result.cover_url:
result.cover_queued = True
self.cover_pool.add_task(result, self.filter_results)
if result in self.matches:
row = self.matches.index(result)
self.dataChanged.emit(self.index(row, 0), self.index(row, self.columnCount() - 1))
if result.drm not in (SearchResult.DRM_LOCKED, SearchResult.DRM_UNLOCKED, SearchResult.DRM_UNKNOWN):
result.drm = SearchResult.DRM_UNKNOWN
self.filter_results()
def set_query(self, query):
self.query = query
def index(self, row, column, parent=QModelIndex()): def index(self, row, column, parent=QModelIndex()):
return self.createIndex(row, column) return self.createIndex(row, column)
@ -420,14 +540,41 @@ class Matches(QAbstractItemModel):
return QVariant(result.author) return QVariant(result.author)
elif col == 3: elif col == 3:
return QVariant(result.price) return QVariant(result.price)
elif col == 4: elif col == 5:
return QVariant(result.store_name) return QVariant(result.store_name)
elif col == 6:
return QVariant(result.formats)
return NONE return NONE
elif role == Qt.DecorationRole: elif role == Qt.DecorationRole:
if col == 0 and result.cover_data: if col == 0 and result.cover_data:
p = QPixmap() p = QPixmap()
p.loadFromData(result.cover_data) p.loadFromData(result.cover_data)
return QVariant(p) return QVariant(p)
if col == 4:
if result.drm == SearchResult.DRM_LOCKED:
return QVariant(self.DRM_LOCKED_ICON)
elif result.drm == SearchResult.DRM_UNLOCKED:
return QVariant(self.DRM_UNLOCKED_ICON)
elif result.drm == SearchResult.DRM_UNKNOWN:
return QVariant(self.DRM_UNKNOWN_ICON)
elif role == Qt.ToolTipRole:
if col == 1:
return QVariant('<p>%s</p>' % result.title)
elif col == 2:
return QVariant('<p>%s</p>' % result.author)
elif col == 3:
return QVariant('<p>' + _('Detected price as: %s. Check with the store before making a purchase to verify this price is correct. This price often does not include promotions the store may be running.') % result.price + '</p>')
elif col == 4:
if result.drm == SearchResult.DRM_LOCKED:
return QVariant('<p>' + _('This book as been detected as having DRM restrictions. This book may not work with your reader and you will have limitations placed upon you as to what you can do with this book. Check with the store before making any purchases to ensure you can actually read this book.') + '</p>')
elif result.drm == SearchResult.DRM_UNLOCKED:
return QVariant('<p>' + _('This book has been detected as being DRM Free. You should be able to use this book on any device provided it is in a format calibre supports for conversion. However, before making a purchase double check the DRM status with the store. The store may not be disclosing the use of DRM.') + '</p>')
else:
return QVariant('<p>' + _('The DRM status of this book could not be determined. There is a very high likelihood that this book is actually DRM restricted.') + '</p>')
elif col == 5:
return QVariant('<p>%s</p>' % result.store_name)
elif col == 6:
return QVariant('<p>%s</p>' % result.formats)
elif role == Qt.SizeHintRole: elif role == Qt.SizeHintRole:
return QSize(64, 64) return QSize(64, 64)
return NONE return NONE
@ -439,24 +586,33 @@ class Matches(QAbstractItemModel):
elif col == 2: elif col == 2:
text = result.author text = result.author
elif col == 3: elif col == 3:
text = result.price text = comparable_price(result.price)
if len(text) < 3 or text[-3] not in ('.', ','):
text += '00'
text = re.sub(r'\D', '', text)
text = text.rjust(6, '0')
elif col == 4: elif col == 4:
if result.drm == SearchResult.DRM_UNLOCKED:
text = 'a'
elif result.drm == SearchResult.DRM_LOCKED:
text = 'b'
else:
text = 'c'
elif col == 5:
text = result.store_name text = result.store_name
elif col == 6:
text = ', '.join(sorted(result.formats.split(',')))
return text return text
def sort(self, col, order, reset=True): def sort(self, col, order, reset=True):
if not self.matches: if not self.matches:
return return
descending = order == Qt.DescendingOrder descending = order == Qt.DescendingOrder
self.matches.sort(None, self.all_matches.sort(None,
lambda x: sort_key(unicode(self.data_as_text(x, col))), lambda x: sort_key(unicode(self.data_as_text(x, col))),
descending) descending)
self.reorder_matches()
if reset: if reset:
self.reset() self.reset()
def reorder_matches(self):
self.matches = sorted(self.matches, key=lambda x: self.all_matches.index(x))
class SearchFilter(SearchQueryParser): class SearchFilter(SearchQueryParser):
@ -466,22 +622,33 @@ class SearchFilter(SearchQueryParser):
'author', 'author',
'authors', 'authors',
'cover', 'cover',
'drm',
'format',
'formats',
'price', 'price',
'title', 'title',
'store', 'store',
] ]
def __init__(self, search_result): def __init__(self):
SearchQueryParser.__init__(self, locations=self.USABLE_LOCATIONS) SearchQueryParser.__init__(self, locations=self.USABLE_LOCATIONS)
self.search_result = search_result self.srs = set([])
def add_search_result(self, search_result):
self.srs.add(search_result)
def clear_search_results(self):
self.srs = set([])
def universal_set(self): def universal_set(self):
return set([self.search_result]) return self.srs
def get_matches(self, location, query): def get_matches(self, location, query):
location = location.lower().strip() location = location.lower().strip()
if location == 'authors': if location == 'authors':
location = 'author' location = 'author'
elif location == 'formats':
location = 'format'
matchkind = CONTAINS_MATCH matchkind = CONTAINS_MATCH
if len(query) > 1: if len(query) > 1:
@ -502,38 +669,54 @@ class SearchFilter(SearchQueryParser):
all_locs = set(self.USABLE_LOCATIONS) - set(['all']) all_locs = set(self.USABLE_LOCATIONS) - set(['all'])
locations = all_locs if location == 'all' else [location] locations = all_locs if location == 'all' else [location]
q = { q = {
'author': self.search_result.author.lower(), 'author': lambda x: x.author.lower(),
'cover': self.search_result.cover_url, 'cover': lambda x: x.cover_url,
'format': '', 'drm': lambda x: x.drm,
'price': self.search_result.price, 'format': lambda x: x.formats,
'store': self.search_result.store_name.lower(), 'price': lambda x: comparable_price(x.price),
'title': self.search_result.title.lower(), 'store': lambda x: x.store_name.lower(),
'title': lambda x: x.title.lower(),
} }
for x in ('author', 'format'): for x in ('author', 'format'):
q[x+'s'] = q[x] q[x+'s'] = q[x]
for locvalue in locations: for sr in self.srs:
ac_val = q[locvalue] for locvalue in locations:
if query == 'true': accessor = q[locvalue]
if ac_val is not None: if query == 'true':
matches.add(self.search_result) if locvalue == 'drm':
continue if accessor(sr) == SearchResult.DRM_LOCKED:
if query == 'false': matches.add(sr)
if ac_val is None: else:
matches.add(self.search_result) if accessor(sr) is not None:
continue matches.add(sr)
try: continue
### Can't separate authors because comma is used for name sep and author sep if query == 'false':
### Exact match might not get what you want. For that reason, turn author if locvalue == 'drm':
### exactmatch searches into contains searches. if accessor(sr) == SearchResult.DRM_UNKNOWN:
if locvalue == 'author' and matchkind == EQUALS_MATCH: matches.add(sr)
m = CONTAINS_MATCH else:
else: if accessor(sr) is None:
m = matchkind matches.add(sr)
continue
vals = [ac_val] # this is bool, so can't match below
if _match(query, vals, m): if locvalue == 'drm':
matches.add(self.search_result) continue
break try:
except ValueError: # Unicode errors ### Can't separate authors because comma is used for name sep and author sep
traceback.print_exc() ### Exact match might not get what you want. For that reason, turn author
### exactmatch searches into contains searches.
if locvalue == 'author' and matchkind == EQUALS_MATCH:
m = CONTAINS_MATCH
else:
m = matchkind
if locvalue == 'format':
vals = accessor(sr).split(',')
else:
vals = [accessor(sr)]
if _match(query, vals, m):
matches.add(sr)
break
except ValueError: # Unicode errors
traceback.print_exc()
return matches return matches

View File

@ -11,7 +11,11 @@
</rect> </rect>
</property> </property>
<property name="windowTitle"> <property name="windowTitle">
<string>calibre Store Search</string> <string>Get Books</string>
</property>
<property name="windowIcon">
<iconset resource="../../../../resources/images.qrc">
<normaloff>:/images/store.png</normaloff>:/images/store.png</iconset>
</property> </property>
<property name="sizeGripEnabled"> <property name="sizeGripEnabled">
<bool>true</bool> <bool>true</bool>
@ -58,8 +62,8 @@
<rect> <rect>
<x>0</x> <x>0</x>
<y>0</y> <y>0</y>
<width>215</width> <width>170</width>
<height>116</height> <height>138</height>
</rect> </rect>
</property> </property>
</widget> </widget>
@ -174,7 +178,9 @@
</item> </item>
</layout> </layout>
</widget> </widget>
<resources/> <resources>
<include location="../../../../resources/images.qrc"/>
</resources>
<connections> <connections>
<connection> <connection>
<sender>close</sender> <sender>close</sender>

View File

@ -8,6 +8,10 @@ __docformat__ = 'restructuredtext en'
class SearchResult(object): class SearchResult(object):
DRM_LOCKED = 1
DRM_UNLOCKED = 2
DRM_UNKNOWN = 3
def __init__(self): def __init__(self):
self.store_name = '' self.store_name = ''
self.cover_url = '' self.cover_url = ''
@ -16,3 +20,5 @@ class SearchResult(object):
self.author = '' self.author = ''
self.price = '' self.price = ''
self.detail_item = '' self.detail_item = ''
self.drm = None
self.formats = ''

View File

@ -90,5 +90,15 @@ class SmashwordsStore(BasicStoreConfig, StorePlugin):
s.author = author.strip() s.author = author.strip()
s.price = price.strip() s.price = price.strip()
s.detail_item = '/books/view/' + id.strip() s.detail_item = '/books/view/' + id.strip()
s.drm = SearchResult.DRM_UNLOCKED
yield s yield s
def get_details(self, search_result, timeout):
url = 'http://www.smashwords.com/'
br = browser()
with closing(br.open(url + search_result.detail_item, timeout=timeout)) as nf:
idata = html.fromstring(nf.read())
search_result.formats = ', '.join(list(set(idata.xpath('//td//b//text()'))))
return True