mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-07 18:24:30 -04:00
Allow specifying timeout to read_url()
This commit is contained in:
parent
dc92c8f9bb
commit
e46982f7df
@ -23,11 +23,11 @@ from calibre.gui2.store.search_result import SearchResult
|
||||
from calibre.gui2.store.web_store_dialog import WebStoreDialog
|
||||
|
||||
|
||||
def read_url(url):
|
||||
def read_url(url, timeout=60):
|
||||
# Kobo uses Akamai which has some bot detection that uses network/tls
|
||||
# protocol data. So use the Chromium network stack to make the request
|
||||
from calibre.scraper.simple import read_url as ru
|
||||
return ru(read_url.storage, url)
|
||||
return ru(read_url.storage, url, timeout=timeout)
|
||||
|
||||
|
||||
read_url.storage = []
|
||||
@ -36,7 +36,7 @@ read_url.storage = []
|
||||
def search_kobo(query, max_results=10, timeout=60, write_html_to=None):
|
||||
from css_selectors import Select
|
||||
url = 'https://www.kobobooks.com/search/search.html?q=' + quote_plus(query)
|
||||
raw = read_url(url)
|
||||
raw = read_url(url, timeout=timeout)
|
||||
if write_html_to is not None:
|
||||
with open(write_html_to, 'w') as f:
|
||||
f.write(raw)
|
||||
@ -119,7 +119,7 @@ class KoboStore(BasicStoreConfig, StorePlugin):
|
||||
yield result
|
||||
|
||||
def get_details(self, search_result, timeout):
|
||||
raw = read_url(search_result.detail_item)
|
||||
raw = read_url(search_result.detail_item, timeout=timeout)
|
||||
idata = html.fromstring(raw)
|
||||
if idata.xpath('boolean(//div[@class="bookitem-secondary-metadata"]//li[contains(text(), "Download options")])'):
|
||||
if idata.xpath('boolean(//div[@class="bookitem-secondary-metadata"]//li[contains(text(), "DRM-Free")])'):
|
||||
|
@ -37,7 +37,8 @@ qt.webenginecontext.info=false
|
||||
raise SystemExit(int(rest))
|
||||
if cmd == b'FETCH':
|
||||
try:
|
||||
html = s.fetch(QUrl.fromEncoded(json.loads(rest).encode('utf-8')))
|
||||
d = json.loads(rest)
|
||||
html = s.fetch(QUrl.fromEncoded(d['url'].encode('utf-8')), timeout=float(d['timeout']))
|
||||
except Exception as e:
|
||||
import traceback
|
||||
result = {'ok': False, 'tb': traceback.format_exc(), 'err': str(e)}
|
||||
@ -67,12 +68,12 @@ class Overseer:
|
||||
ans = self.workers[wname] = w
|
||||
return ans
|
||||
|
||||
def fetch_url(self, url_or_qurl, source=''):
|
||||
def fetch_url(self, url_or_qurl, source='', timeout=60):
|
||||
w = self.worker_for_source(source)
|
||||
if isinstance(url_or_qurl, str):
|
||||
url_or_qurl = QUrl(url_or_qurl)
|
||||
w.stdin.write(b'FETCH:')
|
||||
w.stdin.write(json.dumps(bytes(url_or_qurl.toEncoded()).decode('utf-8')).encode('utf-8'))
|
||||
w.stdin.write(json.dumps({'url': bytes(url_or_qurl.toEncoded()).decode('utf-8'), 'timeout': timeout}).encode('utf-8'))
|
||||
w.stdin.write(b'\n')
|
||||
w.stdin.flush()
|
||||
output = json.loads(w.stdout.readline())
|
||||
@ -117,13 +118,13 @@ def cleanup_overseers():
|
||||
read_url_lock = Lock()
|
||||
|
||||
|
||||
def read_url(storage, url):
|
||||
def read_url(storage, url, timeout=60):
|
||||
with read_url_lock:
|
||||
if not storage:
|
||||
storage.append(Overseer())
|
||||
scraper = storage[0]
|
||||
from calibre.ebooks.chardet import strip_encoding_declarations
|
||||
return strip_encoding_declarations(scraper.fetch_url(url))
|
||||
return strip_encoding_declarations(scraper.fetch_url(url, timeout=timeout))
|
||||
|
||||
|
||||
def find_tests():
|
||||
|
Loading…
x
Reference in New Issue
Block a user