mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Speed up news download by implementing multi-threaded downloading
This commit is contained in:
parent
ea2dfd7ce2
commit
802e8caef6
@ -24,6 +24,12 @@
|
|||||||
news sources.
|
news sources.
|
||||||
type: major
|
type: major
|
||||||
|
|
||||||
|
- title: "Speed up download of news"
|
||||||
|
description: >
|
||||||
|
Speed up download of news by allowing the downlaod to happen in multiple threads
|
||||||
|
(default 5). This may break some recipes, so please report any breakage you notice.
|
||||||
|
type: major
|
||||||
|
|
||||||
- title: "FB2 Output: Support creation of TOC from <h1> tags"
|
- title: "FB2 Output: Support creation of TOC from <h1> tags"
|
||||||
|
|
||||||
- title: "E-book viewer: Make keyboard shortcuts customizable"
|
- title: "E-book viewer: Make keyboard shortcuts customizable"
|
||||||
@ -95,6 +101,7 @@
|
|||||||
- Soldiers Magazine
|
- Soldiers Magazine
|
||||||
- The Economist
|
- The Economist
|
||||||
- Arizona Daily Star
|
- Arizona Daily Star
|
||||||
|
- ESPN
|
||||||
|
|
||||||
|
|
||||||
- version: 0.6.26
|
- version: 0.6.26
|
||||||
|
@ -9,7 +9,6 @@ UTF-8 encoding with any charset declarations removed.
|
|||||||
'''
|
'''
|
||||||
import sys, socket, os, urlparse, re, time, copy, urllib2, threading, traceback
|
import sys, socket, os, urlparse, re, time, copy, urllib2, threading, traceback
|
||||||
from urllib import url2pathname, quote
|
from urllib import url2pathname, quote
|
||||||
from threading import RLock
|
|
||||||
from httplib import responses
|
from httplib import responses
|
||||||
from PIL import Image
|
from PIL import Image
|
||||||
from cStringIO import StringIO
|
from cStringIO import StringIO
|
||||||
@ -39,7 +38,6 @@ class closing(object):
|
|||||||
except Exception:
|
except Exception:
|
||||||
pass
|
pass
|
||||||
|
|
||||||
_browser_lock = RLock()
|
|
||||||
|
|
||||||
bad_url_counter = 0
|
bad_url_counter = 0
|
||||||
def basename(url):
|
def basename(url):
|
||||||
@ -125,7 +123,6 @@ class RecursiveFetcher(object):
|
|||||||
self.imagemap_lock = threading.RLock()
|
self.imagemap_lock = threading.RLock()
|
||||||
self.stylemap = css_map
|
self.stylemap = css_map
|
||||||
self.image_url_processor = None
|
self.image_url_processor = None
|
||||||
self.browser_lock = _browser_lock
|
|
||||||
self.stylemap_lock = threading.RLock()
|
self.stylemap_lock = threading.RLock()
|
||||||
self.downloaded_paths = []
|
self.downloaded_paths = []
|
||||||
self.current_dir = self.base_dir
|
self.current_dir = self.base_dir
|
||||||
@ -196,26 +193,25 @@ class RecursiveFetcher(object):
|
|||||||
for i in range(2, 6):
|
for i in range(2, 6):
|
||||||
purl[i] = quote(purl[i])
|
purl[i] = quote(purl[i])
|
||||||
url = urlparse.urlunparse(purl)
|
url = urlparse.urlunparse(purl)
|
||||||
with self.browser_lock:
|
try:
|
||||||
try:
|
with closing(self.browser.open_novisit(url, timeout=self.timeout)) as f:
|
||||||
|
data = response(f.read()+f.read())
|
||||||
|
data.newurl = f.geturl()
|
||||||
|
except urllib2.URLError, err:
|
||||||
|
if hasattr(err, 'code') and responses.has_key(err.code):
|
||||||
|
raise FetchError, responses[err.code]
|
||||||
|
if getattr(err, 'reason', [0])[0] == 104 or \
|
||||||
|
getattr(getattr(err, 'args', [None])[0], 'errno', None) == -2: # Connection reset by peer or Name or service not know
|
||||||
|
self.log.debug('Temporary error, retrying in 1 second')
|
||||||
|
time.sleep(1)
|
||||||
with closing(self.browser.open(url, timeout=self.timeout)) as f:
|
with closing(self.browser.open(url, timeout=self.timeout)) as f:
|
||||||
data = response(f.read()+f.read())
|
data = response(f.read()+f.read())
|
||||||
data.newurl = f.geturl()
|
data.newurl = f.geturl()
|
||||||
except urllib2.URLError, err:
|
else:
|
||||||
if hasattr(err, 'code') and responses.has_key(err.code):
|
raise err
|
||||||
raise FetchError, responses[err.code]
|
finally:
|
||||||
if getattr(err, 'reason', [0])[0] == 104 or \
|
self.last_fetch_at = time.time()
|
||||||
getattr(getattr(err, 'args', [None])[0], 'errno', None) == -2: # Connection reset by peer or Name or service not know
|
return data
|
||||||
self.log.debug('Temporary error, retrying in 1 second')
|
|
||||||
time.sleep(1)
|
|
||||||
with closing(self.browser.open(url, timeout=self.timeout)) as f:
|
|
||||||
data = response(f.read()+f.read())
|
|
||||||
data.newurl = f.geturl()
|
|
||||||
else:
|
|
||||||
raise err
|
|
||||||
finally:
|
|
||||||
self.last_fetch_at = time.time()
|
|
||||||
return data
|
|
||||||
|
|
||||||
|
|
||||||
def start_fetch(self, url):
|
def start_fetch(self, url):
|
||||||
|
Loading…
x
Reference in New Issue
Block a user