mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-07 18:24:30 -04:00
Allow specifying timeout to read_url()
This commit is contained in:
parent
dc92c8f9bb
commit
e46982f7df
@ -23,11 +23,11 @@ from calibre.gui2.store.search_result import SearchResult
|
|||||||
from calibre.gui2.store.web_store_dialog import WebStoreDialog
|
from calibre.gui2.store.web_store_dialog import WebStoreDialog
|
||||||
|
|
||||||
|
|
||||||
def read_url(url):
|
def read_url(url, timeout=60):
|
||||||
# Kobo uses Akamai which has some bot detection that uses network/tls
|
# Kobo uses Akamai which has some bot detection that uses network/tls
|
||||||
# protocol data. So use the Chromium network stack to make the request
|
# protocol data. So use the Chromium network stack to make the request
|
||||||
from calibre.scraper.simple import read_url as ru
|
from calibre.scraper.simple import read_url as ru
|
||||||
return ru(read_url.storage, url)
|
return ru(read_url.storage, url, timeout=timeout)
|
||||||
|
|
||||||
|
|
||||||
read_url.storage = []
|
read_url.storage = []
|
||||||
@ -36,7 +36,7 @@ read_url.storage = []
|
|||||||
def search_kobo(query, max_results=10, timeout=60, write_html_to=None):
|
def search_kobo(query, max_results=10, timeout=60, write_html_to=None):
|
||||||
from css_selectors import Select
|
from css_selectors import Select
|
||||||
url = 'https://www.kobobooks.com/search/search.html?q=' + quote_plus(query)
|
url = 'https://www.kobobooks.com/search/search.html?q=' + quote_plus(query)
|
||||||
raw = read_url(url)
|
raw = read_url(url, timeout=timeout)
|
||||||
if write_html_to is not None:
|
if write_html_to is not None:
|
||||||
with open(write_html_to, 'w') as f:
|
with open(write_html_to, 'w') as f:
|
||||||
f.write(raw)
|
f.write(raw)
|
||||||
@ -119,7 +119,7 @@ class KoboStore(BasicStoreConfig, StorePlugin):
|
|||||||
yield result
|
yield result
|
||||||
|
|
||||||
def get_details(self, search_result, timeout):
|
def get_details(self, search_result, timeout):
|
||||||
raw = read_url(search_result.detail_item)
|
raw = read_url(search_result.detail_item, timeout=timeout)
|
||||||
idata = html.fromstring(raw)
|
idata = html.fromstring(raw)
|
||||||
if idata.xpath('boolean(//div[@class="bookitem-secondary-metadata"]//li[contains(text(), "Download options")])'):
|
if idata.xpath('boolean(//div[@class="bookitem-secondary-metadata"]//li[contains(text(), "Download options")])'):
|
||||||
if idata.xpath('boolean(//div[@class="bookitem-secondary-metadata"]//li[contains(text(), "DRM-Free")])'):
|
if idata.xpath('boolean(//div[@class="bookitem-secondary-metadata"]//li[contains(text(), "DRM-Free")])'):
|
||||||
|
@ -37,7 +37,8 @@ qt.webenginecontext.info=false
|
|||||||
raise SystemExit(int(rest))
|
raise SystemExit(int(rest))
|
||||||
if cmd == b'FETCH':
|
if cmd == b'FETCH':
|
||||||
try:
|
try:
|
||||||
html = s.fetch(QUrl.fromEncoded(json.loads(rest).encode('utf-8')))
|
d = json.loads(rest)
|
||||||
|
html = s.fetch(QUrl.fromEncoded(d['url'].encode('utf-8')), timeout=float(d['timeout']))
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
import traceback
|
import traceback
|
||||||
result = {'ok': False, 'tb': traceback.format_exc(), 'err': str(e)}
|
result = {'ok': False, 'tb': traceback.format_exc(), 'err': str(e)}
|
||||||
@ -67,12 +68,12 @@ class Overseer:
|
|||||||
ans = self.workers[wname] = w
|
ans = self.workers[wname] = w
|
||||||
return ans
|
return ans
|
||||||
|
|
||||||
def fetch_url(self, url_or_qurl, source=''):
|
def fetch_url(self, url_or_qurl, source='', timeout=60):
|
||||||
w = self.worker_for_source(source)
|
w = self.worker_for_source(source)
|
||||||
if isinstance(url_or_qurl, str):
|
if isinstance(url_or_qurl, str):
|
||||||
url_or_qurl = QUrl(url_or_qurl)
|
url_or_qurl = QUrl(url_or_qurl)
|
||||||
w.stdin.write(b'FETCH:')
|
w.stdin.write(b'FETCH:')
|
||||||
w.stdin.write(json.dumps(bytes(url_or_qurl.toEncoded()).decode('utf-8')).encode('utf-8'))
|
w.stdin.write(json.dumps({'url': bytes(url_or_qurl.toEncoded()).decode('utf-8'), 'timeout': timeout}).encode('utf-8'))
|
||||||
w.stdin.write(b'\n')
|
w.stdin.write(b'\n')
|
||||||
w.stdin.flush()
|
w.stdin.flush()
|
||||||
output = json.loads(w.stdout.readline())
|
output = json.loads(w.stdout.readline())
|
||||||
@ -117,13 +118,13 @@ def cleanup_overseers():
|
|||||||
read_url_lock = Lock()
|
read_url_lock = Lock()
|
||||||
|
|
||||||
|
|
||||||
def read_url(storage, url):
|
def read_url(storage, url, timeout=60):
|
||||||
with read_url_lock:
|
with read_url_lock:
|
||||||
if not storage:
|
if not storage:
|
||||||
storage.append(Overseer())
|
storage.append(Overseer())
|
||||||
scraper = storage[0]
|
scraper = storage[0]
|
||||||
from calibre.ebooks.chardet import strip_encoding_declarations
|
from calibre.ebooks.chardet import strip_encoding_declarations
|
||||||
return strip_encoding_declarations(scraper.fetch_url(url))
|
return strip_encoding_declarations(scraper.fetch_url(url, timeout=timeout))
|
||||||
|
|
||||||
|
|
||||||
def find_tests():
|
def find_tests():
|
||||||
|
Loading…
x
Reference in New Issue
Block a user