mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Speed up news download by implementing multi-threaded downloading
This commit is contained in:
parent
ea2dfd7ce2
commit
802e8caef6
@ -24,6 +24,12 @@
|
||||
news sources.
|
||||
type: major
|
||||
|
||||
- title: "Speed up download of news"
|
||||
description: >
|
||||
Speed up download of news by allowing the downlaod to happen in multiple threads
|
||||
(default 5). This may break some recipes, so please report any breakage you notice.
|
||||
type: major
|
||||
|
||||
- title: "FB2 Output: Support creation of TOC from <h1> tags"
|
||||
|
||||
- title: "E-book viewer: Make keyboard shortcuts customizable"
|
||||
@ -95,6 +101,7 @@
|
||||
- Soldiers Magazine
|
||||
- The Economist
|
||||
- Arizona Daily Star
|
||||
- ESPN
|
||||
|
||||
|
||||
- version: 0.6.26
|
||||
|
@ -9,7 +9,6 @@ UTF-8 encoding with any charset declarations removed.
|
||||
'''
|
||||
import sys, socket, os, urlparse, re, time, copy, urllib2, threading, traceback
|
||||
from urllib import url2pathname, quote
|
||||
from threading import RLock
|
||||
from httplib import responses
|
||||
from PIL import Image
|
||||
from cStringIO import StringIO
|
||||
@ -39,7 +38,6 @@ class closing(object):
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
_browser_lock = RLock()
|
||||
|
||||
bad_url_counter = 0
|
||||
def basename(url):
|
||||
@ -125,7 +123,6 @@ class RecursiveFetcher(object):
|
||||
self.imagemap_lock = threading.RLock()
|
||||
self.stylemap = css_map
|
||||
self.image_url_processor = None
|
||||
self.browser_lock = _browser_lock
|
||||
self.stylemap_lock = threading.RLock()
|
||||
self.downloaded_paths = []
|
||||
self.current_dir = self.base_dir
|
||||
@ -196,26 +193,25 @@ class RecursiveFetcher(object):
|
||||
for i in range(2, 6):
|
||||
purl[i] = quote(purl[i])
|
||||
url = urlparse.urlunparse(purl)
|
||||
with self.browser_lock:
|
||||
try:
|
||||
try:
|
||||
with closing(self.browser.open_novisit(url, timeout=self.timeout)) as f:
|
||||
data = response(f.read()+f.read())
|
||||
data.newurl = f.geturl()
|
||||
except urllib2.URLError, err:
|
||||
if hasattr(err, 'code') and responses.has_key(err.code):
|
||||
raise FetchError, responses[err.code]
|
||||
if getattr(err, 'reason', [0])[0] == 104 or \
|
||||
getattr(getattr(err, 'args', [None])[0], 'errno', None) == -2: # Connection reset by peer or Name or service not know
|
||||
self.log.debug('Temporary error, retrying in 1 second')
|
||||
time.sleep(1)
|
||||
with closing(self.browser.open(url, timeout=self.timeout)) as f:
|
||||
data = response(f.read()+f.read())
|
||||
data.newurl = f.geturl()
|
||||
except urllib2.URLError, err:
|
||||
if hasattr(err, 'code') and responses.has_key(err.code):
|
||||
raise FetchError, responses[err.code]
|
||||
if getattr(err, 'reason', [0])[0] == 104 or \
|
||||
getattr(getattr(err, 'args', [None])[0], 'errno', None) == -2: # Connection reset by peer or Name or service not know
|
||||
self.log.debug('Temporary error, retrying in 1 second')
|
||||
time.sleep(1)
|
||||
with closing(self.browser.open(url, timeout=self.timeout)) as f:
|
||||
data = response(f.read()+f.read())
|
||||
data.newurl = f.geturl()
|
||||
else:
|
||||
raise err
|
||||
finally:
|
||||
self.last_fetch_at = time.time()
|
||||
return data
|
||||
else:
|
||||
raise err
|
||||
finally:
|
||||
self.last_fetch_at = time.time()
|
||||
return data
|
||||
|
||||
|
||||
def start_fetch(self, url):
|
||||
|
Loading…
x
Reference in New Issue
Block a user