mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Merge branch 'py3' of https://github.com/eli-schwartz/calibre
This commit is contained in:
commit
bfbc31fa9f
@ -103,7 +103,7 @@ def save_soup(soup, target):
|
|||||||
f.write(html.encode('utf-8'))
|
f.write(html.encode('utf-8'))
|
||||||
|
|
||||||
|
|
||||||
class response(str):
|
class response(bytes):
|
||||||
|
|
||||||
def __new__(cls, *args):
|
def __new__(cls, *args):
|
||||||
obj = super(response, cls).__new__(cls, *args)
|
obj = super(response, cls).__new__(cls, *args)
|
||||||
@ -261,16 +261,14 @@ class RecursiveFetcher(object):
|
|||||||
delta = time.time() - self.last_fetch_at
|
delta = time.time() - self.last_fetch_at
|
||||||
if delta < self.delay:
|
if delta < self.delay:
|
||||||
time.sleep(self.delay - delta)
|
time.sleep(self.delay - delta)
|
||||||
if isinstance(url, unicode_type):
|
# mechanize does not handle quoting automatically
|
||||||
url = url.encode('utf-8')
|
|
||||||
# Not sure is this is really needed as I think mechanize
|
|
||||||
# handles quoting automatically, but leaving it
|
|
||||||
# in case it breaks something
|
|
||||||
if re.search(r'\s+', url) is not None:
|
if re.search(r'\s+', url) is not None:
|
||||||
|
if isinstance(url, unicode_type):
|
||||||
|
url = url.encode('utf-8')
|
||||||
purl = list(urlparse(url))
|
purl = list(urlparse(url))
|
||||||
for i in range(2, 6):
|
for i in range(2, 6):
|
||||||
purl[i] = quote(purl[i])
|
purl[i] = quote(purl[i])
|
||||||
url = urlunparse(purl)
|
url = urlunparse(purl).decode('utf-8')
|
||||||
open_func = getattr(self.browser, 'open_novisit', self.browser.open)
|
open_func = getattr(self.browser, 'open_novisit', self.browser.open)
|
||||||
try:
|
try:
|
||||||
with closing(open_func(url, timeout=self.timeout)) as f:
|
with closing(open_func(url, timeout=self.timeout)) as f:
|
||||||
@ -414,8 +412,6 @@ class RecursiveFetcher(object):
|
|||||||
continue
|
continue
|
||||||
c += 1
|
c += 1
|
||||||
fname = ascii_filename('img'+str(c))
|
fname = ascii_filename('img'+str(c))
|
||||||
if isinstance(fname, unicode_type):
|
|
||||||
fname = fname.encode('ascii', 'replace')
|
|
||||||
data = self.preprocess_image_ext(data, iurl) if self.preprocess_image_ext is not None else data
|
data = self.preprocess_image_ext(data, iurl) if self.preprocess_image_ext is not None else data
|
||||||
if data is None:
|
if data is None:
|
||||||
continue
|
continue
|
||||||
@ -520,7 +516,7 @@ class RecursiveFetcher(object):
|
|||||||
dsrc = self.fetch_url(iurl)
|
dsrc = self.fetch_url(iurl)
|
||||||
newbaseurl = dsrc.newurl
|
newbaseurl = dsrc.newurl
|
||||||
if len(dsrc) == 0 or \
|
if len(dsrc) == 0 or \
|
||||||
len(re.compile('<!--.*?-->', re.DOTALL).sub('', dsrc).strip()) == 0:
|
len(re.compile(b'<!--.*?-->', re.DOTALL).sub(b'', dsrc).strip()) == 0:
|
||||||
raise ValueError('No content at URL %r'%iurl)
|
raise ValueError('No content at URL %r'%iurl)
|
||||||
if callable(self.encoding):
|
if callable(self.encoding):
|
||||||
dsrc = self.encoding(dsrc)
|
dsrc = self.encoding(dsrc)
|
||||||
@ -544,7 +540,7 @@ class RecursiveFetcher(object):
|
|||||||
_fname = basename(iurl)
|
_fname = basename(iurl)
|
||||||
if not isinstance(_fname, unicode_type):
|
if not isinstance(_fname, unicode_type):
|
||||||
_fname.decode('latin1', 'replace')
|
_fname.decode('latin1', 'replace')
|
||||||
_fname = _fname.encode('ascii', 'replace').replace('%', '').replace(os.sep, '')
|
_fname = _fname.replace('%', '').replace(os.sep, '')
|
||||||
_fname = ascii_filename(_fname)
|
_fname = ascii_filename(_fname)
|
||||||
_fname = os.path.splitext(_fname)[0][:120] + '.xhtml'
|
_fname = os.path.splitext(_fname)[0][:120] + '.xhtml'
|
||||||
res = os.path.join(linkdiskpath, _fname)
|
res = os.path.join(linkdiskpath, _fname)
|
||||||
|
Loading…
x
Reference in New Issue
Block a user