Fix multithreading issues in feeds2disk. This should make news fetching much more robust, though a little slower

2025-08-11 09:13:57 -04:00 · 2008-11-06 12:23:54 -08:00 · 2008-11-06 12:23:54 -08:00 · 6b9b37215d
commit 6b9b37215d
parent dbe52fd1b3
1 changed files with 33 additions and 26 deletions
--- a/src/calibre/web/fetch/simple.py
+++ b/src/calibre/web/fetch/simple.py
@ -9,8 +9,8 @@ UTF-8 encoding with any charset declarations removed.
 '''
 import sys, socket, os, urlparse, logging, re, time, copy, urllib2, threading, traceback
 from urllib import url2pathname
+from threading import RLock
 from httplib import responses
-from contextlib import closing

 from calibre import setup_cli_handlers, browser, sanitize_file_name, \
                    relpath, LoggingInterface
@ -21,6 +21,23 @@ from calibre.utils.config import OptionParser
 class FetchError(Exception):
    pass

+class closing(object):
+    'Context to automatically close something at the end of a block.'
+
+    def __init__(self, thing):
+        self.thing = thing
+    
+    def __enter__(self):
+        return self.thing
+    
+    def __exit__(self, *exc_info):
+        try:
+            self.thing.close()
+        except Exception:
+            pass
+
+_browser_lock = RLock()
+
 def basename(url):
    parts = urlparse.urlsplit(url)
    path = url2pathname(parts.path)
@ -145,19 +162,10 @@ class RecursiveFetcher(object, LoggingInterface):
        delta = time.time() - self.last_fetch_at 
        if  delta < self.delay:
            time.sleep(delta)
-        try:
+        with _browser_lock:
            try:
                with closing(self.browser.open(url)) as f:
-                    data = response(f.read())
-                    data.newurl = f.geturl()
-            except AttributeError:
-                time.sleep(2)
-                try:
-                    with closing(self.browser.open(url)) as f:
-                        data = response(f.read())
-                        data.newurl = f.geturl()
-                except AttributeError:
-                    data = response(urllib2.urlopen(url).read())
+                    data = response(f.read()+f.read())
                    data.newurl = f.geturl()
            except urllib2.URLError, err:
                if hasattr(err, 'code') and responses.has_key(err.code):
@ -165,10 +173,9 @@ class RecursiveFetcher(object, LoggingInterface):
                if getattr(err, 'reason', [0])[0] == 104: # Connection reset by peer
                    self.log_debug('Connection reset by peer retrying in 1 second.')
                    time.sleep(1)
-                if hasattr(f, 'close'):
-                    f.close()
                    with closing(self.browser.open(url)) as f:
-                    data = f.read()
+                        data = response(f.read()+f.read())
+                        data.newurl = f.geturl()
                else: 
                    raise err
            finally: