diff --git a/src/calibre/constants.py b/src/calibre/constants.py index ce68d8cfc4..64577cc56e 100644 --- a/src/calibre/constants.py +++ b/src/calibre/constants.py @@ -8,7 +8,7 @@ __author__ = "Kovid Goyal " Various run time constants. ''' -import sys, locale, codecs +import sys, locale, codecs, os from calibre.utils.terminfo import TerminalController terminal_controller = TerminalController(sys.stdout) @@ -28,3 +28,36 @@ win32event = __import__('win32event') if iswindows else None winerror = __import__('winerror') if iswindows else None win32api = __import__('win32api') if iswindows else None fcntl = None if iswindows else __import__('fcntl') + +################################################################################ +plugins = None +if plugins is None: + # Load plugins + def load_plugins(): + plugins = {} + if isfrozen: + if iswindows: + plugin_path = os.path.join(os.path.dirname(sys.executable), 'plugins') + sys.path.insert(1, os.path.dirname(sys.executable)) + elif isosx: + plugin_path = os.path.join(getattr(sys, 'frameworks_dir'), 'plugins') + elif islinux: + plugin_path = os.path.join(getattr(sys, 'frozen_path'), 'plugins') + sys.path.insert(0, plugin_path) + else: + import pkg_resources + plugin_path = getattr(pkg_resources, 'resource_filename')('calibre', 'plugins') + sys.path.insert(0, plugin_path) + + for plugin in ['pictureflow', 'lzx', 'msdes'] + \ + (['winutil'] if iswindows else []) + \ + (['usbobserver'] if isosx else []): + try: + p, err = __import__(plugin), '' + except Exception, err: + p = None + err = str(err) + plugins[plugin] = (p, err) + return plugins + + plugins = load_plugins() \ No newline at end of file diff --git a/src/calibre/ebooks/lrf/feeds/convert_from.py b/src/calibre/ebooks/lrf/feeds/convert_from.py index b401d6572d..dd1e21aa98 100644 --- a/src/calibre/ebooks/lrf/feeds/convert_from.py +++ b/src/calibre/ebooks/lrf/feeds/convert_from.py @@ -1,4 +1,4 @@ -#!/usr/bin/env python +from __future__ import with_statement __license__ = 'GPL v3' __copyright__ = '2008, Kovid Goyal ' ''' @@ -8,8 +8,9 @@ from calibre.ebooks.lrf import option_parser as lrf_option_parser from calibre.ebooks.lrf.html.convert_from import process_file from calibre.web.feeds.main import option_parser as feeds_option_parser from calibre.web.feeds.main import run_recipe -from calibre.ptempfile import PersistentTemporaryDirectory +from calibre.ptempfile import TemporaryDirectory from calibre import sanitize_file_name, strftime +from calibre.ebooks import ConversionError import sys, os @@ -34,25 +35,27 @@ def main(args=sys.argv, notification=None, handler=None): recipe_arg = args[1] if len(args) > 1 else None - tdir = PersistentTemporaryDirectory('_feeds2lrf') - opts.output_dir = tdir - - recipe = run_recipe(opts, recipe_arg, parser, notification=notification, handler=handler) - - htmlfile = os.path.join(tdir, 'index.html') - if not os.access(htmlfile, os.R_OK): - raise RuntimeError(_('Fetching of recipe failed: ')+recipe_arg) - - lparser = lrf_option_parser('') - ropts = lparser.parse_args(['html2lrf']+recipe.html2lrf_options)[0] - parser.merge_options(ropts, opts) - - if not opts.output: - ext = '.lrs' if opts.lrs else '.lrf' - fname = recipe.title + strftime(recipe.timefmt)+ext - opts.output = os.path.join(os.getcwd(), sanitize_file_name(fname)) - print 'Generating LRF...' - process_file(htmlfile, opts) + with TemporaryDirectory('_feeds2lrf') as tdir: + opts.output_dir = tdir + + recipe = run_recipe(opts, recipe_arg, parser, notification=notification, handler=handler) + + htmlfile = os.path.join(tdir, 'index.html') + if not os.access(htmlfile, os.R_OK): + raise RuntimeError(_('Fetching of recipe failed: ')+recipe_arg) + + lparser = lrf_option_parser('') + ropts = lparser.parse_args(['html2lrf']+recipe.html2lrf_options)[0] + parser.merge_options(ropts, opts) + + if not opts.output: + ext = '.lrs' if opts.lrs else '.lrf' + fname = recipe.title + strftime(recipe.timefmt)+ext + opts.output = os.path.join(os.getcwd(), sanitize_file_name(fname)) + print 'Generating LRF...' + process_file(htmlfile, opts) + if os.stat(opts.output).st_size < 100: # This can happen if the OS runs out of file handles + raise ConversionError(_('Failed to convert downloaded recipe: ')+recipe_arg) return 0 if __name__ == '__main__': diff --git a/src/calibre/ebooks/metadata/opf2.py b/src/calibre/ebooks/metadata/opf2.py index 5a3a74fd89..51b5035290 100644 --- a/src/calibre/ebooks/metadata/opf2.py +++ b/src/calibre/ebooks/metadata/opf2.py @@ -15,15 +15,24 @@ from calibre.ebooks.chardet import xml_to_unicode class MetadataField(object): - def __init__(self, name, is_dc=True): + def __init__(self, name, is_dc=True, formatter=None): self.name = name self.is_dc = is_dc + self.formatter = formatter def __get__(self, obj, type=None): ans = obj.get_metadata_element(self.name) if ans is None: - return u'' - return obj.get_text(ans) + return None + ans = obj.get_text(ans) + if ans is None: + return ans + if self.formatter is not None: + try: + ans = self.formatter(ans) + except: + return None + return ans def __set__(self, obj, val): elem = obj.get_metadata_element(self.name) @@ -60,8 +69,8 @@ class OPF(object): comments = MetadataField('description') category = MetadataField('category') series = MetadataField('series', is_dc=False) - series_index = MetadataField('series_index', is_dc=False) - rating = MetadataField('rating', is_dc=False) + series_index = MetadataField('series_index', is_dc=False, formatter=int) + rating = MetadataField('rating', is_dc=False, formatter=int) def __init__(self, stream, basedir): @@ -194,12 +203,14 @@ class OPFTest(unittest.TestCase): self.assertEqual(opf.author_sort, 'Monkey') self.assertEqual(opf.tags, ['One', 'Two']) self.assertEqual(opf.isbn, '123456789') + self.assertEqual(opf.series, None) + self.assertEqual(opf.series_index, None) def testWriting(self): for test in [('title', 'New & Title'), ('authors', ['One', 'Two']), ('author_sort', "Kitchen"), ('tags', ['Three']), - ('isbn', 'a'), ('rating', '3')]: + ('isbn', 'a'), ('rating', 3)]: setattr(self.opf, *test) self.assertEqual(getattr(self.opf, test[0]), test[1]) diff --git a/src/calibre/gui2/main.py b/src/calibre/gui2/main.py index 6dfe69fcaa..2d20d05b7a 100644 --- a/src/calibre/gui2/main.py +++ b/src/calibre/gui2/main.py @@ -786,6 +786,10 @@ in which you want to store your books files. Any existing books will be automati if to_device: self.status_bar.showMessage(_('News fetched. Uploading to device.'), 2000) self.persistent_files.append(pt) + try: + os.remove(pt.name) + except: + pass ############################################################################ @@ -846,6 +850,7 @@ in which you want to store your books files. Any existing books will be automati of = PersistentTemporaryFile('.lrf') of.close() cover = self.library_view.model().db.cover(row) + cf = None if cover: cf = PersistentTemporaryFile('.jpeg') cf.write(cover) @@ -858,7 +863,7 @@ in which you want to store your books files. Any existing books will be automati description=_('Convert book %d of %d (%s)')%(i+1, len(rows), repr(mi.title))) - self.conversion_jobs[job] = (d.cover_file, pt, of, d.output_format, + self.conversion_jobs[job] = (cf, pt, of, d.output_format, self.library_view.model().db.id(row)) res = [] for row in bad_rows: @@ -882,12 +887,13 @@ in which you want to store your books files. Any existing books will be automati if mi.title: options.title = mi.title if mi.authors: - opts.author = ','.join(mi.authors) + options.author = ','.join(mi.authors) data = None for fmt in ['cbz', 'cbr']: try: data = self.library_view.model().db.format(row, fmt.upper()) - break + if data: + break except: continue @@ -933,7 +939,6 @@ in which you want to store your books files. Any existing books will be automati 'any2lrf', args=[cmdline], description=_('Convert book: ')+d.title()) - self.conversion_jobs[job] = (d.cover_file, pt, of, d.output_format, d.id) changed = True if changed: @@ -984,14 +989,22 @@ in which you want to store your books files. Any existing books will be automati self.library_view.model().research() def book_converted(self, job): - of, fmt, book_id = self.conversion_jobs.pop(job)[2:] - if job.exception is not None: - self.job_exception(job) - return - data = open(of.name, 'rb') - self.library_view.model().db.add_format(book_id, fmt, data, index_is_id=True) - data.close() - self.status_bar.showMessage(job.description + (' completed'), 2000) + cf, pt, of, fmt, book_id = self.conversion_jobs.pop(job) + try: + if job.exception is not None: + self.job_exception(job) + return + data = open(of.name, 'rb') + self.library_view.model().db.add_format(book_id, fmt, data, index_is_id=True) + data.close() + self.status_bar.showMessage(job.description + (' completed'), 2000) + finally: + for f in (cf, of, pt): + try: + if os.path.exists(f.name): + os.remove(f.name) + except: + pass #############################View book###################################### diff --git a/src/calibre/library/database.py b/src/calibre/library/database.py index 10f27b1734..ff0c8bb07b 100644 --- a/src/calibre/library/database.py +++ b/src/calibre/library/database.py @@ -976,9 +976,15 @@ ALTER TABLE books ADD COLUMN isbn TEXT DEFAULT "" COLLATE NOCASE; return ans[0] def series_index(self, index, index_is_id=False): + ans = None if not index_is_id: - return self.data[index][10] - return self.conn.execute('SELECT series_index FROM books WHERE id=?', (index,)).fetchone()[0] + ans = self.data[index][10] + else: + ans = self.conn.execute('SELECT series_index FROM books WHERE id=?', (index,)).fetchone()[0] + try: + return int(ans) + except: + return 1 def books_in_series(self, series_id): ''' @@ -1229,6 +1235,7 @@ ALTER TABLE books ADD COLUMN isbn TEXT DEFAULT "" COLLATE NOCASE; self.conn.commit() def set_series_index(self, id, idx): + idx = int(idx) self.conn.execute('UPDATE books SET series_index=? WHERE id=?', (int(idx), id)) self.conn.commit() row = self.row(id) diff --git a/src/calibre/library/database2.py b/src/calibre/library/database2.py index 173108afb8..fae2f73a5c 100644 --- a/src/calibre/library/database2.py +++ b/src/calibre/library/database2.py @@ -513,6 +513,21 @@ class LibraryDatabase2(LibraryDatabase): p.loadFromData(data) p.save(path) + def formats(self, index, index_is_id=False): + ''' Return available formats as a comma separated list ''' + id = index if index_is_id else self.id(index) + path = os.path.join(self.library_path, self.path(id, index_is_id=True)) + formats = self.conn.execute('SELECT format FROM data WHERE book=?', (id,)).fetchall() + name = self.conn.execute('SELECT name FROM data WHERE book=?', (id,)).fetchone()[0] + formats = map(lambda x:x[0], formats) + ans = [] + for format in formats: + _format = ('.' + format.lower()) if format else '' + if os.access(os.path.join(path, name+_format), os.R_OK|os.W_OK): + ans.append(format) + return ','.join(ans) + + def format(self, index, format, index_is_id=False, as_file=False, mode='r+b'): ''' Return the ebook format as a bytestring or `None` if the format doesn't exist, @@ -529,7 +544,7 @@ class LibraryDatabase2(LibraryDatabase): if os.access(path, os.R_OK|os.W_OK): f = open(path, mode) return f if as_file else f.read() - self.remove_format(id, format, index_is_id=True) + self.remove_format(id, format, index_is_id=True) def add_format(self, index, format, stream, index_is_id=False, path=None): id = index if index_is_id else self.id(index) @@ -571,8 +586,10 @@ class LibraryDatabase2(LibraryDatabase): if name: ext = ('.' + format.lower()) if format else '' path = os.path.join(path, name+ext) - if os.access(path, os.W_OK): + try: os.remove(path) + except: + pass self.conn.execute('DELETE FROM data WHERE book=? AND format=?', (id, format.upper())) self.conn.commit() @@ -664,6 +681,9 @@ class LibraryDatabase2(LibraryDatabase): self.data.set(row, 9, series) def set_series_index(self, id, idx): + if idx is None: + idx = 1 + idx = int(idx) self.conn.execute('UPDATE books SET series_index=? WHERE id=?', (int(idx), id)) self.conn.commit() row = self.row(id) diff --git a/src/calibre/parallel.py b/src/calibre/parallel.py index 498b012e4b..b7f3b7bfc0 100644 --- a/src/calibre/parallel.py +++ b/src/calibre/parallel.py @@ -25,7 +25,7 @@ the worker interrupts the job and dies. The sending of progress and console outp is buffered and asynchronous to prevent the job from being IO bound. ''' import sys, os, gc, cPickle, traceback, atexit, cStringIO, time, signal, \ - subprocess, socket, collections, binascii, re, thread, tempfile + subprocess, socket, collections, binascii, re, thread, tempfile, atexit from select import select from threading import RLock, Thread, Event from math import ceil @@ -855,8 +855,14 @@ def get_func(name): func = getattr(module, func) return func, kwdargs, notification +_atexit = collections.deque() +def myatexit(func, *args, **kwargs): + _atexit.append((func, args, kwargs)) + def work(client_socket, func, args, kwdargs): sys.stdout.last_report = time.time() + orig = atexit.register + atexit.register = myatexit try: func, kargs, notification = get_func(func) if notification is not None and hasattr(sys.stdout, 'notify'): @@ -867,7 +873,18 @@ def work(client_socket, func, args, kwdargs): sys.stdout.send() return res finally: + atexit.register = orig sys.stdout.last_report = None + while True: + try: + func, args, kwargs = _atexit.pop() + except IndexError: + break + try: + func(*args, **kwargs) + except (Exception, SystemExit): + continue + time.sleep(5) # Give any in progress BufferedSend time to complete diff --git a/src/calibre/ptempfile.py b/src/calibre/ptempfile.py index 00c9976bd2..fd28c87f58 100644 --- a/src/calibre/ptempfile.py +++ b/src/calibre/ptempfile.py @@ -9,30 +9,6 @@ import tempfile, os, atexit, shutil from calibre import __version__, __appname__ -class _TemporaryFileWrapper(object): - """ - Temporary file wrapper - - This class provides a wrapper around files opened for - temporary use. In particular, it seeks to automatically - remove the file when the object is deleted. - """ - - def __init__(self, _file, name): - self.file = _file - self.name = name - atexit.register(cleanup, name) - - def __getattr__(self, name): - _file = self.__dict__['file'] - a = getattr(_file, name) - if type(a) != type(0): - setattr(self, name, a) - return a - - def __del__(self): - self.close() - def cleanup(path): try: import os @@ -41,18 +17,36 @@ def cleanup(path): except: pass -def PersistentTemporaryFile(suffix="", prefix="", dir=None): +class PersistentTemporaryFile(object): """ - Return a temporary file that is available even after being closed on + A file-like object that is a temporary file that is available even after being closed on all platforms. It is automatically deleted on normal program termination. - Uses tempfile.mkstemp to create the file. The file is opened in mode 'wb'. """ - if prefix == None: - prefix = "" - fd, name = tempfile.mkstemp(suffix, __appname__+"_"+ __version__+"_" + prefix, - dir=dir) - _file = os.fdopen(fd, 'w+b') - return _TemporaryFileWrapper(_file, name) + _file = None + + def __init__(self, suffix="", prefix="", dir=None, mode='w+b'): + if prefix == None: + prefix = "" + fd, name = tempfile.mkstemp(suffix, __appname__+"_"+ __version__+"_" + prefix, + dir=dir) + self._file = os.fdopen(fd, 'w+b') + self._name = name + atexit.register(cleanup, name) + + def __getattr__(self, name): + if name == 'name': + return self.__dict__['_name'] + return getattr(self.__dict__['_file'], name) + + def __enter__(self): + return self + + def __exit__(self, *args): + self.close() + + def __del__(self): + self.close() + def PersistentTemporaryDirectory(suffix='', prefix='', dir=None): ''' @@ -64,6 +58,9 @@ def PersistentTemporaryDirectory(suffix='', prefix='', dir=None): return tdir class TemporaryDirectory(str): + ''' + A temporary directory to be used ina with statement. + ''' def __init__(self, suffix='', prefix='', dir=None): self.suffix = suffix self.prefix = prefix diff --git a/src/calibre/startup.py b/src/calibre/startup.py index a5d4582013..fc2749323a 100644 --- a/src/calibre/startup.py +++ b/src/calibre/startup.py @@ -13,14 +13,14 @@ from gettext import GNUTranslations import __builtin__ __builtin__.__dict__['_'] = lambda s: s -from calibre.constants import iswindows, isosx, islinux, isfrozen,\ - preferred_encoding -from calibre.translations.msgfmt import make +from calibre.constants import iswindows, preferred_encoding, plugins from calibre.utils.config import prefs +from calibre.translations.msgfmt import make _run_once = False if not _run_once: _run_once = True + ################################################################################ # Setup translations @@ -74,38 +74,6 @@ if not _run_once: except: pass - ################################################################################ - # Load plugins - def load_plugins(): - plugins = {} - if isfrozen: - if iswindows: - plugin_path = os.path.join(os.path.dirname(sys.executable), 'plugins') - sys.path.insert(1, os.path.dirname(sys.executable)) - elif isosx: - plugin_path = os.path.join(getattr(sys, 'frameworks_dir'), 'plugins') - elif islinux: - plugin_path = os.path.join(getattr(sys, 'frozen_path'), 'plugins') - sys.path.insert(0, plugin_path) - else: - import pkg_resources - plugin_path = getattr(pkg_resources, 'resource_filename')('calibre', 'plugins') - sys.path.insert(0, plugin_path) - - for plugin in ['pictureflow', 'lzx', 'msdes'] + \ - (['winutil'] if iswindows else []) + \ - (['usbobserver'] if isosx else []): - try: - p, err = __import__(plugin), '' - except Exception, err: - p = None - err = str(err) - plugins[plugin] = (p, err) - return plugins - - plugins = load_plugins() - - ################################################################################ # Improve builtin path functions to handle unicode sensibly diff --git a/src/calibre/utils/config.py b/src/calibre/utils/config.py index 184aea604a..c20a49bb09 100644 --- a/src/calibre/utils/config.py +++ b/src/calibre/utils/config.py @@ -13,12 +13,11 @@ from optparse import OptionParser as _OptionParser from optparse import IndentedHelpFormatter from PyQt4.QtCore import QString from calibre.constants import terminal_controller, iswindows, isosx, \ - __appname__, __version__, __author__ + __appname__, __version__, __author__, plugins from calibre.utils.lock import LockError, ExclusiveFile from collections import defaultdict if iswindows: - from calibre import plugins config_dir = plugins['winutil'][0].special_folder_path(plugins['winutil'][0].CSIDL_APPDATA) if not os.access(config_dir, os.W_OK|os.X_OK): config_dir = os.path.expanduser('~') diff --git a/src/calibre/web/feeds/news.py b/src/calibre/web/feeds/news.py index 530f15b9ab..f606985d07 100644 --- a/src/calibre/web/feeds/news.py +++ b/src/calibre/web/feeds/news.py @@ -1,4 +1,4 @@ -#!/usr/bin/env python +from __future__ import with_statement __license__ = 'GPL v3' __copyright__ = '2008, Kovid Goyal ' ''' @@ -313,7 +313,9 @@ class BasicNewsRecipe(object, LoggingInterface): `url_or_raw`: Either a URL or the downloaded index page as a string ''' if re.match(r'\w+://', url_or_raw): - raw = self.browser.open(url_or_raw).read() + f = self.browser.open(url_or_raw) + raw = f.read() + f.close() if not raw: raise RuntimeError('Could not fetch index from %s'%url_or_raw) else: @@ -544,7 +546,10 @@ class BasicNewsRecipe(object, LoggingInterface): if bn: img = os.path.join(imgdir, 'feed_image_%d%s'%(self.image_counter, os.path.splitext(bn))) try: - open(img, 'wb').write(self.browser.open(feed.image_url).read()) + with open(img, 'wb') as fi: + r = self.browser.open(feed.image_url) + fi.write(r.read()) + r.close() self.image_counter += 1 feed.image_url = img self.image_map[feed.image_url] = img @@ -588,12 +593,11 @@ class BasicNewsRecipe(object, LoggingInterface): return self._fetch_article(url, dir, logger, f, a, num_of_feeds) def fetch_embedded_article(self, article, dir, logger, f, a, num_of_feeds): - pt = PersistentTemporaryFile('_feeds2disk.html') templ = templates.EmbeddedContent() raw = templ.generate(article).render('html') - open(pt.name, 'wb').write(raw) - pt.close() - url = ('file:'+pt.name) if iswindows else ('file://'+pt.name) + with PersistentTemporaryFile('_feeds2disk.html') as f: + f.write(raw) + url = ('file:'+f.name) if iswindows else ('file://'+f.name) return self._fetch_article(url, dir, logger, f, a, num_of_feeds) @@ -618,7 +622,8 @@ class BasicNewsRecipe(object, LoggingInterface): index = os.path.join(self.output_dir, 'index.html') html = self.feeds2index(feeds) - open(index, 'wb').write(html) + with open(index, 'wb') as fi: + fi.write(html) self.jobs = [] for f, feed in enumerate(feeds): @@ -670,7 +675,8 @@ class BasicNewsRecipe(object, LoggingInterface): for f, feed in enumerate(feeds): html = self.feed2index(feed) feed_dir = os.path.join(self.output_dir, 'feed_%d'%f) - open(os.path.join(feed_dir, 'index.html'), 'wb').write(html) + with open(os.path.join(feed_dir, 'index.html'), 'wb') as fi: + fi.write(html) self.create_opf(feeds) self.report_progress(1, _('Feeds downloaded to %s')%index) @@ -689,8 +695,10 @@ class BasicNewsRecipe(object, LoggingInterface): ext = ext.lower() if ext else 'jpg' self.report_progress(1, _('Downloading cover from %s')%cu) cpath = os.path.join(self.output_dir, 'cover.'+ext) - cfile = open(cpath, 'wb') - cfile.write(self.browser.open(cu).read()) + with open(cpath, 'wb') as cfile: + r = self.browser.open(cu) + cfile.write(r.read()) + r.close() self.cover_path = cpath @@ -729,7 +737,8 @@ class BasicNewsRecipe(object, LoggingInterface): entries.append(relp.replace(os.sep, '/')) last = sp - src = open(last, 'rb').read().decode('utf-8') + with open(last, 'rb') as fi: + src = fi.read().decode('utf-8') soup = BeautifulSoup(src) body = soup.find('body') if body is not None: @@ -740,7 +749,8 @@ class BasicNewsRecipe(object, LoggingInterface): center=self.center_navbar) elem = BeautifulSoup(templ.render(doctype='xhtml').decode('utf-8')).find('div') body.insert(len(body.contents), elem) - open(last, 'wb').write(unicode(soup).encode('utf-8')) + with open(last, 'wb') as fi: + fi.write(unicode(soup).encode('utf-8')) if len(feeds) > 1: for i, f in enumerate(feeds): @@ -755,7 +765,9 @@ class BasicNewsRecipe(object, LoggingInterface): opf.create_spine(entries) opf.set_toc(toc) - opf.render(open(opf_path, 'wb'), open(ncx_path, 'wb')) + with open(opf_path, 'wb') as opf_file: + with open(ncx_path, 'wb') as ncx_file: + opf.render(opf_file, ncx_file) def article_downloaded(self, request, result): @@ -800,12 +812,13 @@ class BasicNewsRecipe(object, LoggingInterface): else: title, url = obj self.report_progress(0, _('Fetching feed')+' %s...'%(title if title else url)) - parsed_feeds.append(feed_from_xml(self.browser.open(url).read(), + f = self.browser.open(url) + parsed_feeds.append(feed_from_xml(f.read(), title=title, oldest_article=self.oldest_article, max_articles_per_feed=self.max_articles_per_feed, get_article_url=self.get_article_url)) - + f.close() return parsed_feeds @classmethod @@ -891,7 +904,8 @@ class CustomIndexRecipe(BasicNewsRecipe): mi = OPFCreator(self.output_dir, mi) mi.create_manifest_from_files_in([self.output_dir]) mi.create_spine([os.path.join(self.output_dir, 'index.html')]) - mi.render(open(os.path.join(self.output_dir, 'index.opf'), 'wb')) + with open(os.path.join(self.output_dir, 'index.opf'), 'wb') as opf_file: + mi.render(opf_file) def download(self): index = os.path.abspath(self.custom_index()) diff --git a/src/calibre/web/feeds/recipes/economist.py b/src/calibre/web/feeds/recipes/economist.py index 8794886d21..3c75bd4237 100644 --- a/src/calibre/web/feeds/recipes/economist.py +++ b/src/calibre/web/feeds/recipes/economist.py @@ -33,14 +33,14 @@ class Economist(BasicNewsRecipe): return br def parse_index(self): - soup = BeautifulSoup(self.browser.open(self.INDEX).read(), + soup = BeautifulSoup(self.browser.open(self.INDEX).read(), convertEntities=BeautifulSoup.HTML_ENTITIES) index_started = False feeds = {} ans = [] key = None for tag in soup.findAll(['h1', 'h2']): - text = ''.join(tag.findAll(text=True)) + text = ''.join(tag.findAll(text=True)) if tag.name == 'h1': if 'Classified ads' in text: break diff --git a/src/calibre/web/fetch/simple.py b/src/calibre/web/fetch/simple.py index 749c57bcde..9b8f666c4d 100644 --- a/src/calibre/web/fetch/simple.py +++ b/src/calibre/web/fetch/simple.py @@ -44,11 +44,10 @@ def save_soup(soup, target): if path and os.path.isfile(path) and os.path.exists(path) and os.path.isabs(path): tag[key] = relpath(path, selfdir).replace(os.sep, '/') - f = open(target, 'wb') html = unicode(soup) - f.write(html.encode('utf-8')) - f.close() - + with open(target, 'wb') as f: + f.write(html.encode('utf-8')) + class RecursiveFetcher(object, LoggingInterface): LINK_FILTER = tuple(re.compile(i, re.IGNORECASE) for i in @@ -59,6 +58,7 @@ class RecursiveFetcher(object, LoggingInterface): # ) # ) CSS_IMPORT_PATTERN = re.compile(r'\@import\s+url\((.*?)\)', re.IGNORECASE) + default_timeout = socket.getdefaulttimeout() # Needed here as it is used in __del__ def __init__(self, options, logger, image_map={}, css_map={}, job_info=None): LoggingInterface.__init__(self, logger) @@ -99,7 +99,7 @@ class RecursiveFetcher(object, LoggingInterface): def get_soup(self, src): nmassage = copy.copy(BeautifulSoup.MARKUP_MASSAGE) nmassage.extend(self.preprocess_regexps) - soup = BeautifulSoup(xml_to_unicode(src, self.verbose)[0], markupMassage=nmassage) + soup = BeautifulSoup(xml_to_unicode(src, self.verbose, strip_encoding_pats=True)[0], markupMassage=nmassage) if self.keep_only_tags: body = Tag(soup, 'body') @@ -145,6 +145,8 @@ class RecursiveFetcher(object, LoggingInterface): if getattr(err, 'reason', [0])[0] == 104: # Connection reset by peer self.log_debug('Connection reset by peer retrying in 1 second.') time.sleep(1) + if hasattr(f, 'close'): + f.close() f = self.browser.open(url) else: raise err @@ -196,11 +198,14 @@ class RecursiveFetcher(object, LoggingInterface): except Exception, err: self.log_warning('Could not fetch stylesheet %s', iurl) self.log_debug('Error: %s', str(err), exc_info=True) + if hasattr(f, 'close'): f.close() continue stylepath = os.path.join(diskpath, 'style'+str(c)+'.css') with self.stylemap_lock: self.stylemap[iurl] = stylepath - open(stylepath, 'wb').write(f.read()) + with open(stylepath, 'wb') as x: + x.write(f.read()) + f.close() tag['href'] = stylepath else: for ns in tag.findAll(text=True): @@ -219,12 +224,15 @@ class RecursiveFetcher(object, LoggingInterface): except Exception, err: self.log_warning('Could not fetch stylesheet %s', iurl) self.log_debug('Error: %s', str(err), exc_info=True) + if hasattr(f, 'close'): f.close() continue c += 1 stylepath = os.path.join(diskpath, 'style'+str(c)+'.css') with self.stylemap_lock: self.stylemap[iurl] = stylepath - open(stylepath, 'wb').write(f.read()) + with open(stylepath, 'wb') as x: + x.write(f.read()) + f.close() ns.replaceWith(src.replace(m.group(1), stylepath)) @@ -250,6 +258,7 @@ class RecursiveFetcher(object, LoggingInterface): except Exception, err: self.log_warning('Could not fetch image %s', iurl) self.log_debug('Error: %s', str(err), exc_info=True) + if hasattr(f, 'close'): f.close() continue c += 1 fname = sanitize_file_name('img'+str(c)+ext) @@ -258,7 +267,9 @@ class RecursiveFetcher(object, LoggingInterface): imgpath = os.path.join(diskpath, fname) with self.imagemap_lock: self.imagemap[iurl] = imgpath - open(imgpath, 'wb').write(f.read()) + with open(imgpath, 'wb') as x: + x.write(f.read()) + f.close() tag['src'] = imgpath def absurl(self, baseurl, tag, key, filter=True): @@ -327,6 +338,7 @@ class RecursiveFetcher(object, LoggingInterface): self.current_dir = linkdiskpath f = self.fetch_url(iurl) dsrc = f.read() + f.close() if len(dsrc) == 0 or \ len(re.compile('', re.DOTALL).sub('', dsrc).strip()) == 0: raise ValueError('No content at URL %s'%iurl) @@ -378,7 +390,9 @@ class RecursiveFetcher(object, LoggingInterface): return res def __del__(self): - socket.setdefaulttimeout(self.default_timeout) + dt = getattr(self, 'default_timeout', None) + if dt is not None: + socket.setdefaulttimeout(dt) def option_parser(usage=_('%prog URL\n\nWhere URL is for example http://google.com')): parser = OptionParser(usage=usage)