mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Improved logging for news downloads, log performance numbers
This commit is contained in:
parent
1027cf8a1e
commit
cf73771c87
@ -177,6 +177,17 @@ class ThreadSafeLog(Log):
|
|||||||
with self._lock:
|
with self._lock:
|
||||||
Log.prints(self, *args, **kwargs)
|
Log.prints(self, *args, **kwargs)
|
||||||
|
|
||||||
|
class ThreadSafeWrapper(Log):
|
||||||
|
|
||||||
|
def __init__(self, other_log):
|
||||||
|
Log.__init__(self, level=other_log.filter_level)
|
||||||
|
self.outputs = list(other_log.outputs)
|
||||||
|
self._lock = RLock()
|
||||||
|
|
||||||
|
def prints(self, *args, **kwargs):
|
||||||
|
with self._lock:
|
||||||
|
Log.prints(self, *args, **kwargs)
|
||||||
|
|
||||||
class GUILog(ThreadSafeLog):
|
class GUILog(ThreadSafeLog):
|
||||||
|
|
||||||
'''
|
'''
|
||||||
|
@ -29,6 +29,7 @@ from calibre.ptempfile import PersistentTemporaryFile
|
|||||||
from calibre.utils.date import now as nowf
|
from calibre.utils.date import now as nowf
|
||||||
from calibre.utils.magick.draw import save_cover_data_to, add_borders_to_image
|
from calibre.utils.magick.draw import save_cover_data_to, add_borders_to_image
|
||||||
from calibre.utils.localization import canonicalize_lang
|
from calibre.utils.localization import canonicalize_lang
|
||||||
|
from calibre.utils.logging import ThreadSafeWrapper
|
||||||
|
|
||||||
class LoginFailed(ValueError):
|
class LoginFailed(ValueError):
|
||||||
pass
|
pass
|
||||||
@ -841,7 +842,7 @@ class BasicNewsRecipe(Recipe):
|
|||||||
:param parser: Command line option parser. Used to intelligently merge options.
|
:param parser: Command line option parser. Used to intelligently merge options.
|
||||||
:param progress_reporter: A Callable that takes two arguments: progress (a number between 0 and 1) and a string message. The message should be optional.
|
:param progress_reporter: A Callable that takes two arguments: progress (a number between 0 and 1) and a string message. The message should be optional.
|
||||||
'''
|
'''
|
||||||
self.log = log
|
self.log = ThreadSafeWrapper(log)
|
||||||
if not isinstance(self.title, unicode):
|
if not isinstance(self.title, unicode):
|
||||||
self.title = unicode(self.title, 'utf-8', 'replace')
|
self.title = unicode(self.title, 'utf-8', 'replace')
|
||||||
|
|
||||||
|
@ -156,7 +156,8 @@ class RecursiveFetcher(object):
|
|||||||
def get_soup(self, src, url=None):
|
def get_soup(self, src, url=None):
|
||||||
nmassage = copy.copy(BeautifulSoup.MARKUP_MASSAGE)
|
nmassage = copy.copy(BeautifulSoup.MARKUP_MASSAGE)
|
||||||
nmassage.extend(self.preprocess_regexps)
|
nmassage.extend(self.preprocess_regexps)
|
||||||
nmassage += [(re.compile(r'<!DOCTYPE .+?>', re.DOTALL), lambda m: '')] # Some websites have buggy doctype declarations that mess up beautifulsoup
|
# Some websites have buggy doctype declarations that mess up beautifulsoup
|
||||||
|
nmassage += [(re.compile(r'<!DOCTYPE .+?>', re.DOTALL|re.IGNORECASE), lambda m: '')]
|
||||||
# Remove comments as they can leave detritus when extracting tags leaves
|
# Remove comments as they can leave detritus when extracting tags leaves
|
||||||
# multiple nested comments
|
# multiple nested comments
|
||||||
nmassage.append((re.compile(r'<!--.*?-->', re.DOTALL), lambda m: ''))
|
nmassage.append((re.compile(r'<!--.*?-->', re.DOTALL), lambda m: ''))
|
||||||
@ -207,6 +208,7 @@ class RecursiveFetcher(object):
|
|||||||
def fetch_url(self, url):
|
def fetch_url(self, url):
|
||||||
data = None
|
data = None
|
||||||
self.log.debug('Fetching', url)
|
self.log.debug('Fetching', url)
|
||||||
|
st = time.time()
|
||||||
|
|
||||||
# Check for a URL pointing to the local filesystem and special case it
|
# Check for a URL pointing to the local filesystem and special case it
|
||||||
# for efficiency and robustness. Bypasses delay checking as it does not
|
# for efficiency and robustness. Bypasses delay checking as it does not
|
||||||
@ -225,6 +227,7 @@ class RecursiveFetcher(object):
|
|||||||
data = response(f.read())
|
data = response(f.read())
|
||||||
data.newurl = 'file:'+url # This is what mechanize does for
|
data.newurl = 'file:'+url # This is what mechanize does for
|
||||||
# local URLs
|
# local URLs
|
||||||
|
self.log.debug('Fetched %s in %f seconds' % (url, time.time() - st))
|
||||||
return data
|
return data
|
||||||
|
|
||||||
delta = time.time() - self.last_fetch_at
|
delta = time.time() - self.last_fetch_at
|
||||||
@ -260,11 +263,11 @@ class RecursiveFetcher(object):
|
|||||||
raise err
|
raise err
|
||||||
finally:
|
finally:
|
||||||
self.last_fetch_at = time.time()
|
self.last_fetch_at = time.time()
|
||||||
|
self.log.debug('Fetched %s in %f seconds' % (url, time.time() - st))
|
||||||
return data
|
return data
|
||||||
|
|
||||||
def start_fetch(self, url):
|
def start_fetch(self, url):
|
||||||
soup = BeautifulSoup(u'<a href="'+url+'" />')
|
soup = BeautifulSoup(u'<a href="'+url+'" />')
|
||||||
self.log.debug('Downloading')
|
|
||||||
res = self.process_links(soup, url, 0, into_dir='')
|
res = self.process_links(soup, url, 0, into_dir='')
|
||||||
self.log.debug(url, 'saved to', res)
|
self.log.debug(url, 'saved to', res)
|
||||||
return res
|
return res
|
||||||
@ -526,7 +529,9 @@ class RecursiveFetcher(object):
|
|||||||
else:
|
else:
|
||||||
dsrc = xml_to_unicode(dsrc, self.verbose)[0]
|
dsrc = xml_to_unicode(dsrc, self.verbose)[0]
|
||||||
|
|
||||||
|
st = time.time()
|
||||||
soup = self.get_soup(dsrc, url=iurl)
|
soup = self.get_soup(dsrc, url=iurl)
|
||||||
|
self.log.debug('Parsed %s in %f seconds' % (iurl, time.time() - st))
|
||||||
|
|
||||||
base = soup.find('base', href=True)
|
base = soup.find('base', href=True)
|
||||||
if base is not None:
|
if base is not None:
|
||||||
|
Loading…
x
Reference in New Issue
Block a user