diff --git a/src/calibre/__init__.py b/src/calibre/__init__.py
index 9d599c9cf0..81c8027bb4 100644
--- a/src/calibre/__init__.py
+++ b/src/calibre/__init__.py
@@ -641,50 +641,6 @@ def url_slash_cleaner(url):
'''
return re.sub(r'(? tag from the
+ # detail odps page but this is easier.
+ id = ''.join(data.xpath('./*[local-name() = "id"]/text()')).strip()
+ s.detail_item = url_slash_cleaner('%s/ebooks/%s' % (web_url, re.sub('[^\d]', '', id)))
+ if not s.detail_item:
+ continue
+
+ s.title = ' '.join(data.xpath('./*[local-name() = "title"]//text()')).strip()
+ s.author = ', '.join(data.xpath('./*[local-name() = "content"]//text()')).strip()
+ if not s.title or not s.author:
+ continue
+
+ # Get the formats and direct download links.
+ with closing(br.open(id, timeout=timeout/4)) as nf:
+ ndoc = etree.fromstring(nf.read())
+ for link in ndoc.xpath('//*[local-name() = "link" and @rel = "http://opds-spec.org/acquisition"]'):
+ type = link.get('type')
+ href = link.get('href')
+ if type:
+ ext = mimetypes.guess_extension(type)
+ if ext:
+ ext = ext[1:].upper().strip()
+ s.downloads[ext] = href
+
+ s.formats = ', '.join(s.downloads.keys())
+ if not s.formats:
+ continue
+
+ for link in data.xpath('./*[local-name() = "link"]'):
+ rel = link.get('rel')
+ href = link.get('href')
+ type = link.get('type')
+
+ if rel and href and type:
+ if rel in ('http://opds-spec.org/thumbnail', 'http://opds-spec.org/image/thumbnail'):
+ if href.startswith('data:image/png;base64,'):
+ s.cover_data = base64.b64decode(href.replace('data:image/png;base64,', ''))
+
+ yield s
+
class GutenbergStore(BasicStoreConfig, OpenSearchOPDSStore):
open_search_url = 'http://www.gutenberg.org/catalog/osd-books.xml'
- web_url = 'http://m.gutenberg.org/'
+ web_url = web_url
def search(self, query, max_results=10, timeout=60):
'''
@@ -48,57 +105,5 @@ class GutenbergStore(BasicStoreConfig, OpenSearchOPDSStore):
* Images are not links but base64 encoded strings. They are also not
real cover images but a little blue book thumbnail.
'''
-
- url = 'http://m.gutenberg.org/ebooks/search.opds/?query=' + urllib.quote_plus(query)
-
- counter = max_results
- br = browser(user_agent='calibre/'+__version__)
- with closing(br.open(url, timeout=timeout)) as f:
- doc = etree.fromstring(f.read())
- for data in doc.xpath('//*[local-name() = "entry"]'):
- if counter <= 0:
- break
-
- counter -= 1
-
- s = SearchResult()
-
- # We could use the tag from the
- # detail odps page but this is easier.
- id = ''.join(data.xpath('./*[local-name() = "id"]/text()')).strip()
- s.detail_item = url_slash_cleaner('%s/ebooks/%s' % (self.web_url, re.sub('[^\d]', '', id)))
- if not s.detail_item:
- continue
-
- s.title = ' '.join(data.xpath('./*[local-name() = "title"]//text()')).strip()
- s.author = ', '.join(data.xpath('./*[local-name() = "content"]//text()')).strip()
- if not s.title or not s.author:
- continue
-
- # Get the formats and direct download links.
- with closing(br.open(id, timeout=timeout/4)) as nf:
- ndoc = etree.fromstring(nf.read())
- for link in ndoc.xpath('//*[local-name() = "link" and @rel = "http://opds-spec.org/acquisition"]'):
- type = link.get('type')
- href = link.get('href')
- if type:
- ext = mimetypes.guess_extension(type)
- if ext:
- ext = ext[1:].upper().strip()
- s.downloads[ext] = href
-
- s.formats = ', '.join(s.downloads.keys())
- if not s.formats:
- continue
-
- for link in data.xpath('./*[local-name() = "link"]'):
- rel = link.get('rel')
- href = link.get('href')
- type = link.get('type')
-
- if rel and href and type:
- if rel in ('http://opds-spec.org/thumbnail', 'http://opds-spec.org/image/thumbnail'):
- if href.startswith('data:image/png;base64,'):
- s.cover_data = base64.b64decode(href.replace('data:image/png;base64,', ''))
-
- yield s
+ for result in search(query, max_results, timeout):
+ yield result
diff --git a/src/calibre/gui2/store/web_control.py b/src/calibre/gui2/store/web_control.py
index c2f38f2bd4..49f7f5634d 100644
--- a/src/calibre/gui2/store/web_control.py
+++ b/src/calibre/gui2/store/web_control.py
@@ -12,11 +12,12 @@ from urlparse import urlparse
from PyQt5.Qt import QNetworkCookieJar, QNetworkProxy, QUrl
from PyQt5.QtWebKitWidgets import QWebView, QWebPage
-from calibre import USER_AGENT, get_proxies, get_download_filename
+from calibre import USER_AGENT, get_proxies
from calibre.ebooks import BOOK_EXTENSIONS
from calibre.gui2 import choose_save_file
from calibre.ptempfile import PersistentTemporaryFile
from calibre.utils.filenames import ascii_filename
+from calibre.web import get_download_filename
class NPWebView(QWebView):
diff --git a/src/calibre/web/__init__.py b/src/calibre/web/__init__.py
index b14dc0ce28..761da488f5 100644
--- a/src/calibre/web/__init__.py
+++ b/src/calibre/web/__init__.py
@@ -5,3 +5,54 @@ __copyright__ = '2008, Kovid Goyal '
class Recipe(object):
pass
+def get_download_filename_from_response(response):
+ from urllib2 import unquote as urllib2_unquote
+ filename = last_part_name = ''
+ try:
+ last_part_name = response.geturl().split('/')[-1]
+ disposition = response.info().get('Content-disposition', '')
+ for p in disposition.split(';'):
+ if 'filename' in p:
+ if '*=' in disposition:
+ parts = disposition.split('*=')[-1]
+ filename = parts.split('\'')[-1]
+ else:
+ filename = disposition.split('=')[-1]
+ if filename[0] in ('\'', '"'):
+ filename = filename[1:]
+ if filename[-1] in ('\'', '"'):
+ filename = filename[:-1]
+ filename = urllib2_unquote(filename)
+ break
+ except Exception:
+ import traceback
+ traceback.print_exc()
+ return filename or last_part_name
+
+
+def get_download_filename(url, cookie_file=None):
+ '''
+ Get a local filename for a URL using the content disposition header
+ Returns empty string if an error occurs.
+ '''
+ from calibre import browser
+ from contextlib import closing
+
+ filename = ''
+
+ br = browser()
+ if cookie_file:
+ from mechanize import MozillaCookieJar
+ cj = MozillaCookieJar()
+ cj.load(cookie_file)
+ br.set_cookiejar(cj)
+
+ try:
+ with closing(br.open(url)) as r:
+ filename = get_download_filename_from_response(r)
+ except:
+ import traceback
+ traceback.print_exc()
+
+ return filename
+