diff --git a/src/calibre/__init__.py b/src/calibre/__init__.py index 9d599c9cf0..81c8027bb4 100644 --- a/src/calibre/__init__.py +++ b/src/calibre/__init__.py @@ -641,50 +641,6 @@ def url_slash_cleaner(url): ''' return re.sub(r'(? tag from the + # detail odps page but this is easier. + id = ''.join(data.xpath('./*[local-name() = "id"]/text()')).strip() + s.detail_item = url_slash_cleaner('%s/ebooks/%s' % (web_url, re.sub('[^\d]', '', id))) + if not s.detail_item: + continue + + s.title = ' '.join(data.xpath('./*[local-name() = "title"]//text()')).strip() + s.author = ', '.join(data.xpath('./*[local-name() = "content"]//text()')).strip() + if not s.title or not s.author: + continue + + # Get the formats and direct download links. + with closing(br.open(id, timeout=timeout/4)) as nf: + ndoc = etree.fromstring(nf.read()) + for link in ndoc.xpath('//*[local-name() = "link" and @rel = "http://opds-spec.org/acquisition"]'): + type = link.get('type') + href = link.get('href') + if type: + ext = mimetypes.guess_extension(type) + if ext: + ext = ext[1:].upper().strip() + s.downloads[ext] = href + + s.formats = ', '.join(s.downloads.keys()) + if not s.formats: + continue + + for link in data.xpath('./*[local-name() = "link"]'): + rel = link.get('rel') + href = link.get('href') + type = link.get('type') + + if rel and href and type: + if rel in ('http://opds-spec.org/thumbnail', 'http://opds-spec.org/image/thumbnail'): + if href.startswith('data:image/png;base64,'): + s.cover_data = base64.b64decode(href.replace('data:image/png;base64,', '')) + + yield s + class GutenbergStore(BasicStoreConfig, OpenSearchOPDSStore): open_search_url = 'http://www.gutenberg.org/catalog/osd-books.xml' - web_url = 'http://m.gutenberg.org/' + web_url = web_url def search(self, query, max_results=10, timeout=60): ''' @@ -48,57 +105,5 @@ class GutenbergStore(BasicStoreConfig, OpenSearchOPDSStore): * Images are not links but base64 encoded strings. They are also not real cover images but a little blue book thumbnail. ''' - - url = 'http://m.gutenberg.org/ebooks/search.opds/?query=' + urllib.quote_plus(query) - - counter = max_results - br = browser(user_agent='calibre/'+__version__) - with closing(br.open(url, timeout=timeout)) as f: - doc = etree.fromstring(f.read()) - for data in doc.xpath('//*[local-name() = "entry"]'): - if counter <= 0: - break - - counter -= 1 - - s = SearchResult() - - # We could use the tag from the - # detail odps page but this is easier. - id = ''.join(data.xpath('./*[local-name() = "id"]/text()')).strip() - s.detail_item = url_slash_cleaner('%s/ebooks/%s' % (self.web_url, re.sub('[^\d]', '', id))) - if not s.detail_item: - continue - - s.title = ' '.join(data.xpath('./*[local-name() = "title"]//text()')).strip() - s.author = ', '.join(data.xpath('./*[local-name() = "content"]//text()')).strip() - if not s.title or not s.author: - continue - - # Get the formats and direct download links. - with closing(br.open(id, timeout=timeout/4)) as nf: - ndoc = etree.fromstring(nf.read()) - for link in ndoc.xpath('//*[local-name() = "link" and @rel = "http://opds-spec.org/acquisition"]'): - type = link.get('type') - href = link.get('href') - if type: - ext = mimetypes.guess_extension(type) - if ext: - ext = ext[1:].upper().strip() - s.downloads[ext] = href - - s.formats = ', '.join(s.downloads.keys()) - if not s.formats: - continue - - for link in data.xpath('./*[local-name() = "link"]'): - rel = link.get('rel') - href = link.get('href') - type = link.get('type') - - if rel and href and type: - if rel in ('http://opds-spec.org/thumbnail', 'http://opds-spec.org/image/thumbnail'): - if href.startswith('data:image/png;base64,'): - s.cover_data = base64.b64decode(href.replace('data:image/png;base64,', '')) - - yield s + for result in search(query, max_results, timeout): + yield result diff --git a/src/calibre/gui2/store/web_control.py b/src/calibre/gui2/store/web_control.py index c2f38f2bd4..49f7f5634d 100644 --- a/src/calibre/gui2/store/web_control.py +++ b/src/calibre/gui2/store/web_control.py @@ -12,11 +12,12 @@ from urlparse import urlparse from PyQt5.Qt import QNetworkCookieJar, QNetworkProxy, QUrl from PyQt5.QtWebKitWidgets import QWebView, QWebPage -from calibre import USER_AGENT, get_proxies, get_download_filename +from calibre import USER_AGENT, get_proxies from calibre.ebooks import BOOK_EXTENSIONS from calibre.gui2 import choose_save_file from calibre.ptempfile import PersistentTemporaryFile from calibre.utils.filenames import ascii_filename +from calibre.web import get_download_filename class NPWebView(QWebView): diff --git a/src/calibre/web/__init__.py b/src/calibre/web/__init__.py index b14dc0ce28..761da488f5 100644 --- a/src/calibre/web/__init__.py +++ b/src/calibre/web/__init__.py @@ -5,3 +5,54 @@ __copyright__ = '2008, Kovid Goyal ' class Recipe(object): pass +def get_download_filename_from_response(response): + from urllib2 import unquote as urllib2_unquote + filename = last_part_name = '' + try: + last_part_name = response.geturl().split('/')[-1] + disposition = response.info().get('Content-disposition', '') + for p in disposition.split(';'): + if 'filename' in p: + if '*=' in disposition: + parts = disposition.split('*=')[-1] + filename = parts.split('\'')[-1] + else: + filename = disposition.split('=')[-1] + if filename[0] in ('\'', '"'): + filename = filename[1:] + if filename[-1] in ('\'', '"'): + filename = filename[:-1] + filename = urllib2_unquote(filename) + break + except Exception: + import traceback + traceback.print_exc() + return filename or last_part_name + + +def get_download_filename(url, cookie_file=None): + ''' + Get a local filename for a URL using the content disposition header + Returns empty string if an error occurs. + ''' + from calibre import browser + from contextlib import closing + + filename = '' + + br = browser() + if cookie_file: + from mechanize import MozillaCookieJar + cj = MozillaCookieJar() + cj.load(cookie_file) + br.set_cookiejar(cj) + + try: + with closing(br.open(url)) as r: + filename = get_download_filename_from_response(r) + except: + import traceback + traceback.print_exc() + + return filename +