mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
IGN:Remove temporary files as soon as possible, rather than only at program exit. Fixes for various minor regressions.
This commit is contained in:
parent
6fee09b9d2
commit
a679086e53
@ -8,7 +8,7 @@ __author__ = "Kovid Goyal <kovid@kovidgoyal.net>"
|
||||
Various run time constants.
|
||||
'''
|
||||
|
||||
import sys, locale, codecs
|
||||
import sys, locale, codecs, os
|
||||
from calibre.utils.terminfo import TerminalController
|
||||
|
||||
terminal_controller = TerminalController(sys.stdout)
|
||||
@ -28,3 +28,36 @@ win32event = __import__('win32event') if iswindows else None
|
||||
winerror = __import__('winerror') if iswindows else None
|
||||
win32api = __import__('win32api') if iswindows else None
|
||||
fcntl = None if iswindows else __import__('fcntl')
|
||||
|
||||
################################################################################
|
||||
plugins = None
|
||||
if plugins is None:
|
||||
# Load plugins
|
||||
def load_plugins():
|
||||
plugins = {}
|
||||
if isfrozen:
|
||||
if iswindows:
|
||||
plugin_path = os.path.join(os.path.dirname(sys.executable), 'plugins')
|
||||
sys.path.insert(1, os.path.dirname(sys.executable))
|
||||
elif isosx:
|
||||
plugin_path = os.path.join(getattr(sys, 'frameworks_dir'), 'plugins')
|
||||
elif islinux:
|
||||
plugin_path = os.path.join(getattr(sys, 'frozen_path'), 'plugins')
|
||||
sys.path.insert(0, plugin_path)
|
||||
else:
|
||||
import pkg_resources
|
||||
plugin_path = getattr(pkg_resources, 'resource_filename')('calibre', 'plugins')
|
||||
sys.path.insert(0, plugin_path)
|
||||
|
||||
for plugin in ['pictureflow', 'lzx', 'msdes'] + \
|
||||
(['winutil'] if iswindows else []) + \
|
||||
(['usbobserver'] if isosx else []):
|
||||
try:
|
||||
p, err = __import__(plugin), ''
|
||||
except Exception, err:
|
||||
p = None
|
||||
err = str(err)
|
||||
plugins[plugin] = (p, err)
|
||||
return plugins
|
||||
|
||||
plugins = load_plugins()
|
@ -1,4 +1,4 @@
|
||||
#!/usr/bin/env python
|
||||
from __future__ import with_statement
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
|
||||
'''
|
||||
@ -8,8 +8,9 @@ from calibre.ebooks.lrf import option_parser as lrf_option_parser
|
||||
from calibre.ebooks.lrf.html.convert_from import process_file
|
||||
from calibre.web.feeds.main import option_parser as feeds_option_parser
|
||||
from calibre.web.feeds.main import run_recipe
|
||||
from calibre.ptempfile import PersistentTemporaryDirectory
|
||||
from calibre.ptempfile import TemporaryDirectory
|
||||
from calibre import sanitize_file_name, strftime
|
||||
from calibre.ebooks import ConversionError
|
||||
|
||||
import sys, os
|
||||
|
||||
@ -34,7 +35,7 @@ def main(args=sys.argv, notification=None, handler=None):
|
||||
|
||||
recipe_arg = args[1] if len(args) > 1 else None
|
||||
|
||||
tdir = PersistentTemporaryDirectory('_feeds2lrf')
|
||||
with TemporaryDirectory('_feeds2lrf') as tdir:
|
||||
opts.output_dir = tdir
|
||||
|
||||
recipe = run_recipe(opts, recipe_arg, parser, notification=notification, handler=handler)
|
||||
@ -53,6 +54,8 @@ def main(args=sys.argv, notification=None, handler=None):
|
||||
opts.output = os.path.join(os.getcwd(), sanitize_file_name(fname))
|
||||
print 'Generating LRF...'
|
||||
process_file(htmlfile, opts)
|
||||
if os.stat(opts.output).st_size < 100: # This can happen if the OS runs out of file handles
|
||||
raise ConversionError(_('Failed to convert downloaded recipe: ')+recipe_arg)
|
||||
return 0
|
||||
|
||||
if __name__ == '__main__':
|
||||
|
@ -15,15 +15,24 @@ from calibre.ebooks.chardet import xml_to_unicode
|
||||
|
||||
class MetadataField(object):
|
||||
|
||||
def __init__(self, name, is_dc=True):
|
||||
def __init__(self, name, is_dc=True, formatter=None):
|
||||
self.name = name
|
||||
self.is_dc = is_dc
|
||||
self.formatter = formatter
|
||||
|
||||
def __get__(self, obj, type=None):
|
||||
ans = obj.get_metadata_element(self.name)
|
||||
if ans is None:
|
||||
return u''
|
||||
return obj.get_text(ans)
|
||||
return None
|
||||
ans = obj.get_text(ans)
|
||||
if ans is None:
|
||||
return ans
|
||||
if self.formatter is not None:
|
||||
try:
|
||||
ans = self.formatter(ans)
|
||||
except:
|
||||
return None
|
||||
return ans
|
||||
|
||||
def __set__(self, obj, val):
|
||||
elem = obj.get_metadata_element(self.name)
|
||||
@ -60,8 +69,8 @@ class OPF(object):
|
||||
comments = MetadataField('description')
|
||||
category = MetadataField('category')
|
||||
series = MetadataField('series', is_dc=False)
|
||||
series_index = MetadataField('series_index', is_dc=False)
|
||||
rating = MetadataField('rating', is_dc=False)
|
||||
series_index = MetadataField('series_index', is_dc=False, formatter=int)
|
||||
rating = MetadataField('rating', is_dc=False, formatter=int)
|
||||
|
||||
|
||||
def __init__(self, stream, basedir):
|
||||
@ -194,12 +203,14 @@ class OPFTest(unittest.TestCase):
|
||||
self.assertEqual(opf.author_sort, 'Monkey')
|
||||
self.assertEqual(opf.tags, ['One', 'Two'])
|
||||
self.assertEqual(opf.isbn, '123456789')
|
||||
self.assertEqual(opf.series, None)
|
||||
self.assertEqual(opf.series_index, None)
|
||||
|
||||
|
||||
def testWriting(self):
|
||||
for test in [('title', 'New & Title'), ('authors', ['One', 'Two']),
|
||||
('author_sort', "Kitchen"), ('tags', ['Three']),
|
||||
('isbn', 'a'), ('rating', '3')]:
|
||||
('isbn', 'a'), ('rating', 3)]:
|
||||
setattr(self.opf, *test)
|
||||
self.assertEqual(getattr(self.opf, test[0]), test[1])
|
||||
|
||||
|
@ -786,6 +786,10 @@ in which you want to store your books files. Any existing books will be automati
|
||||
if to_device:
|
||||
self.status_bar.showMessage(_('News fetched. Uploading to device.'), 2000)
|
||||
self.persistent_files.append(pt)
|
||||
try:
|
||||
os.remove(pt.name)
|
||||
except:
|
||||
pass
|
||||
|
||||
############################################################################
|
||||
|
||||
@ -846,6 +850,7 @@ in which you want to store your books files. Any existing books will be automati
|
||||
of = PersistentTemporaryFile('.lrf')
|
||||
of.close()
|
||||
cover = self.library_view.model().db.cover(row)
|
||||
cf = None
|
||||
if cover:
|
||||
cf = PersistentTemporaryFile('.jpeg')
|
||||
cf.write(cover)
|
||||
@ -858,7 +863,7 @@ in which you want to store your books files. Any existing books will be automati
|
||||
description=_('Convert book %d of %d (%s)')%(i+1, len(rows), repr(mi.title)))
|
||||
|
||||
|
||||
self.conversion_jobs[job] = (d.cover_file, pt, of, d.output_format,
|
||||
self.conversion_jobs[job] = (cf, pt, of, d.output_format,
|
||||
self.library_view.model().db.id(row))
|
||||
res = []
|
||||
for row in bad_rows:
|
||||
@ -882,11 +887,12 @@ in which you want to store your books files. Any existing books will be automati
|
||||
if mi.title:
|
||||
options.title = mi.title
|
||||
if mi.authors:
|
||||
opts.author = ','.join(mi.authors)
|
||||
options.author = ','.join(mi.authors)
|
||||
data = None
|
||||
for fmt in ['cbz', 'cbr']:
|
||||
try:
|
||||
data = self.library_view.model().db.format(row, fmt.upper())
|
||||
if data:
|
||||
break
|
||||
except:
|
||||
continue
|
||||
@ -933,7 +939,6 @@ in which you want to store your books files. Any existing books will be automati
|
||||
'any2lrf', args=[cmdline],
|
||||
description=_('Convert book: ')+d.title())
|
||||
|
||||
|
||||
self.conversion_jobs[job] = (d.cover_file, pt, of, d.output_format, d.id)
|
||||
changed = True
|
||||
if changed:
|
||||
@ -984,7 +989,8 @@ in which you want to store your books files. Any existing books will be automati
|
||||
self.library_view.model().research()
|
||||
|
||||
def book_converted(self, job):
|
||||
of, fmt, book_id = self.conversion_jobs.pop(job)[2:]
|
||||
cf, pt, of, fmt, book_id = self.conversion_jobs.pop(job)
|
||||
try:
|
||||
if job.exception is not None:
|
||||
self.job_exception(job)
|
||||
return
|
||||
@ -992,6 +998,13 @@ in which you want to store your books files. Any existing books will be automati
|
||||
self.library_view.model().db.add_format(book_id, fmt, data, index_is_id=True)
|
||||
data.close()
|
||||
self.status_bar.showMessage(job.description + (' completed'), 2000)
|
||||
finally:
|
||||
for f in (cf, of, pt):
|
||||
try:
|
||||
if os.path.exists(f.name):
|
||||
os.remove(f.name)
|
||||
except:
|
||||
pass
|
||||
|
||||
#############################View book######################################
|
||||
|
||||
|
@ -976,9 +976,15 @@ ALTER TABLE books ADD COLUMN isbn TEXT DEFAULT "" COLLATE NOCASE;
|
||||
return ans[0]
|
||||
|
||||
def series_index(self, index, index_is_id=False):
|
||||
ans = None
|
||||
if not index_is_id:
|
||||
return self.data[index][10]
|
||||
return self.conn.execute('SELECT series_index FROM books WHERE id=?', (index,)).fetchone()[0]
|
||||
ans = self.data[index][10]
|
||||
else:
|
||||
ans = self.conn.execute('SELECT series_index FROM books WHERE id=?', (index,)).fetchone()[0]
|
||||
try:
|
||||
return int(ans)
|
||||
except:
|
||||
return 1
|
||||
|
||||
def books_in_series(self, series_id):
|
||||
'''
|
||||
@ -1229,6 +1235,7 @@ ALTER TABLE books ADD COLUMN isbn TEXT DEFAULT "" COLLATE NOCASE;
|
||||
self.conn.commit()
|
||||
|
||||
def set_series_index(self, id, idx):
|
||||
idx = int(idx)
|
||||
self.conn.execute('UPDATE books SET series_index=? WHERE id=?', (int(idx), id))
|
||||
self.conn.commit()
|
||||
row = self.row(id)
|
||||
|
@ -513,6 +513,21 @@ class LibraryDatabase2(LibraryDatabase):
|
||||
p.loadFromData(data)
|
||||
p.save(path)
|
||||
|
||||
def formats(self, index, index_is_id=False):
|
||||
''' Return available formats as a comma separated list '''
|
||||
id = index if index_is_id else self.id(index)
|
||||
path = os.path.join(self.library_path, self.path(id, index_is_id=True))
|
||||
formats = self.conn.execute('SELECT format FROM data WHERE book=?', (id,)).fetchall()
|
||||
name = self.conn.execute('SELECT name FROM data WHERE book=?', (id,)).fetchone()[0]
|
||||
formats = map(lambda x:x[0], formats)
|
||||
ans = []
|
||||
for format in formats:
|
||||
_format = ('.' + format.lower()) if format else ''
|
||||
if os.access(os.path.join(path, name+_format), os.R_OK|os.W_OK):
|
||||
ans.append(format)
|
||||
return ','.join(ans)
|
||||
|
||||
|
||||
def format(self, index, format, index_is_id=False, as_file=False, mode='r+b'):
|
||||
'''
|
||||
Return the ebook format as a bytestring or `None` if the format doesn't exist,
|
||||
@ -571,8 +586,10 @@ class LibraryDatabase2(LibraryDatabase):
|
||||
if name:
|
||||
ext = ('.' + format.lower()) if format else ''
|
||||
path = os.path.join(path, name+ext)
|
||||
if os.access(path, os.W_OK):
|
||||
try:
|
||||
os.remove(path)
|
||||
except:
|
||||
pass
|
||||
self.conn.execute('DELETE FROM data WHERE book=? AND format=?', (id, format.upper()))
|
||||
self.conn.commit()
|
||||
|
||||
@ -664,6 +681,9 @@ class LibraryDatabase2(LibraryDatabase):
|
||||
self.data.set(row, 9, series)
|
||||
|
||||
def set_series_index(self, id, idx):
|
||||
if idx is None:
|
||||
idx = 1
|
||||
idx = int(idx)
|
||||
self.conn.execute('UPDATE books SET series_index=? WHERE id=?', (int(idx), id))
|
||||
self.conn.commit()
|
||||
row = self.row(id)
|
||||
|
@ -25,7 +25,7 @@ the worker interrupts the job and dies. The sending of progress and console outp
|
||||
is buffered and asynchronous to prevent the job from being IO bound.
|
||||
'''
|
||||
import sys, os, gc, cPickle, traceback, atexit, cStringIO, time, signal, \
|
||||
subprocess, socket, collections, binascii, re, thread, tempfile
|
||||
subprocess, socket, collections, binascii, re, thread, tempfile, atexit
|
||||
from select import select
|
||||
from threading import RLock, Thread, Event
|
||||
from math import ceil
|
||||
@ -855,8 +855,14 @@ def get_func(name):
|
||||
func = getattr(module, func)
|
||||
return func, kwdargs, notification
|
||||
|
||||
_atexit = collections.deque()
|
||||
def myatexit(func, *args, **kwargs):
|
||||
_atexit.append((func, args, kwargs))
|
||||
|
||||
def work(client_socket, func, args, kwdargs):
|
||||
sys.stdout.last_report = time.time()
|
||||
orig = atexit.register
|
||||
atexit.register = myatexit
|
||||
try:
|
||||
func, kargs, notification = get_func(func)
|
||||
if notification is not None and hasattr(sys.stdout, 'notify'):
|
||||
@ -867,7 +873,18 @@ def work(client_socket, func, args, kwdargs):
|
||||
sys.stdout.send()
|
||||
return res
|
||||
finally:
|
||||
atexit.register = orig
|
||||
sys.stdout.last_report = None
|
||||
while True:
|
||||
try:
|
||||
func, args, kwargs = _atexit.pop()
|
||||
except IndexError:
|
||||
break
|
||||
try:
|
||||
func(*args, **kwargs)
|
||||
except (Exception, SystemExit):
|
||||
continue
|
||||
|
||||
time.sleep(5) # Give any in progress BufferedSend time to complete
|
||||
|
||||
|
||||
|
@ -9,30 +9,6 @@ import tempfile, os, atexit, shutil
|
||||
|
||||
from calibre import __version__, __appname__
|
||||
|
||||
class _TemporaryFileWrapper(object):
|
||||
"""
|
||||
Temporary file wrapper
|
||||
|
||||
This class provides a wrapper around files opened for
|
||||
temporary use. In particular, it seeks to automatically
|
||||
remove the file when the object is deleted.
|
||||
"""
|
||||
|
||||
def __init__(self, _file, name):
|
||||
self.file = _file
|
||||
self.name = name
|
||||
atexit.register(cleanup, name)
|
||||
|
||||
def __getattr__(self, name):
|
||||
_file = self.__dict__['file']
|
||||
a = getattr(_file, name)
|
||||
if type(a) != type(0):
|
||||
setattr(self, name, a)
|
||||
return a
|
||||
|
||||
def __del__(self):
|
||||
self.close()
|
||||
|
||||
def cleanup(path):
|
||||
try:
|
||||
import os
|
||||
@ -41,18 +17,36 @@ def cleanup(path):
|
||||
except:
|
||||
pass
|
||||
|
||||
def PersistentTemporaryFile(suffix="", prefix="", dir=None):
|
||||
class PersistentTemporaryFile(object):
|
||||
"""
|
||||
Return a temporary file that is available even after being closed on
|
||||
A file-like object that is a temporary file that is available even after being closed on
|
||||
all platforms. It is automatically deleted on normal program termination.
|
||||
Uses tempfile.mkstemp to create the file. The file is opened in mode 'wb'.
|
||||
"""
|
||||
_file = None
|
||||
|
||||
def __init__(self, suffix="", prefix="", dir=None, mode='w+b'):
|
||||
if prefix == None:
|
||||
prefix = ""
|
||||
fd, name = tempfile.mkstemp(suffix, __appname__+"_"+ __version__+"_" + prefix,
|
||||
dir=dir)
|
||||
_file = os.fdopen(fd, 'w+b')
|
||||
return _TemporaryFileWrapper(_file, name)
|
||||
self._file = os.fdopen(fd, 'w+b')
|
||||
self._name = name
|
||||
atexit.register(cleanup, name)
|
||||
|
||||
def __getattr__(self, name):
|
||||
if name == 'name':
|
||||
return self.__dict__['_name']
|
||||
return getattr(self.__dict__['_file'], name)
|
||||
|
||||
def __enter__(self):
|
||||
return self
|
||||
|
||||
def __exit__(self, *args):
|
||||
self.close()
|
||||
|
||||
def __del__(self):
|
||||
self.close()
|
||||
|
||||
|
||||
def PersistentTemporaryDirectory(suffix='', prefix='', dir=None):
|
||||
'''
|
||||
@ -64,6 +58,9 @@ def PersistentTemporaryDirectory(suffix='', prefix='', dir=None):
|
||||
return tdir
|
||||
|
||||
class TemporaryDirectory(str):
|
||||
'''
|
||||
A temporary directory to be used ina with statement.
|
||||
'''
|
||||
def __init__(self, suffix='', prefix='', dir=None):
|
||||
self.suffix = suffix
|
||||
self.prefix = prefix
|
||||
|
@ -13,14 +13,14 @@ from gettext import GNUTranslations
|
||||
import __builtin__
|
||||
__builtin__.__dict__['_'] = lambda s: s
|
||||
|
||||
from calibre.constants import iswindows, isosx, islinux, isfrozen,\
|
||||
preferred_encoding
|
||||
from calibre.translations.msgfmt import make
|
||||
from calibre.constants import iswindows, preferred_encoding, plugins
|
||||
from calibre.utils.config import prefs
|
||||
from calibre.translations.msgfmt import make
|
||||
|
||||
_run_once = False
|
||||
if not _run_once:
|
||||
_run_once = True
|
||||
|
||||
################################################################################
|
||||
# Setup translations
|
||||
|
||||
@ -74,38 +74,6 @@ if not _run_once:
|
||||
except:
|
||||
pass
|
||||
|
||||
################################################################################
|
||||
# Load plugins
|
||||
def load_plugins():
|
||||
plugins = {}
|
||||
if isfrozen:
|
||||
if iswindows:
|
||||
plugin_path = os.path.join(os.path.dirname(sys.executable), 'plugins')
|
||||
sys.path.insert(1, os.path.dirname(sys.executable))
|
||||
elif isosx:
|
||||
plugin_path = os.path.join(getattr(sys, 'frameworks_dir'), 'plugins')
|
||||
elif islinux:
|
||||
plugin_path = os.path.join(getattr(sys, 'frozen_path'), 'plugins')
|
||||
sys.path.insert(0, plugin_path)
|
||||
else:
|
||||
import pkg_resources
|
||||
plugin_path = getattr(pkg_resources, 'resource_filename')('calibre', 'plugins')
|
||||
sys.path.insert(0, plugin_path)
|
||||
|
||||
for plugin in ['pictureflow', 'lzx', 'msdes'] + \
|
||||
(['winutil'] if iswindows else []) + \
|
||||
(['usbobserver'] if isosx else []):
|
||||
try:
|
||||
p, err = __import__(plugin), ''
|
||||
except Exception, err:
|
||||
p = None
|
||||
err = str(err)
|
||||
plugins[plugin] = (p, err)
|
||||
return plugins
|
||||
|
||||
plugins = load_plugins()
|
||||
|
||||
|
||||
################################################################################
|
||||
# Improve builtin path functions to handle unicode sensibly
|
||||
|
||||
|
@ -13,12 +13,11 @@ from optparse import OptionParser as _OptionParser
|
||||
from optparse import IndentedHelpFormatter
|
||||
from PyQt4.QtCore import QString
|
||||
from calibre.constants import terminal_controller, iswindows, isosx, \
|
||||
__appname__, __version__, __author__
|
||||
__appname__, __version__, __author__, plugins
|
||||
from calibre.utils.lock import LockError, ExclusiveFile
|
||||
from collections import defaultdict
|
||||
|
||||
if iswindows:
|
||||
from calibre import plugins
|
||||
config_dir = plugins['winutil'][0].special_folder_path(plugins['winutil'][0].CSIDL_APPDATA)
|
||||
if not os.access(config_dir, os.W_OK|os.X_OK):
|
||||
config_dir = os.path.expanduser('~')
|
||||
|
@ -1,4 +1,4 @@
|
||||
#!/usr/bin/env python
|
||||
from __future__ import with_statement
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
|
||||
'''
|
||||
@ -313,7 +313,9 @@ class BasicNewsRecipe(object, LoggingInterface):
|
||||
`url_or_raw`: Either a URL or the downloaded index page as a string
|
||||
'''
|
||||
if re.match(r'\w+://', url_or_raw):
|
||||
raw = self.browser.open(url_or_raw).read()
|
||||
f = self.browser.open(url_or_raw)
|
||||
raw = f.read()
|
||||
f.close()
|
||||
if not raw:
|
||||
raise RuntimeError('Could not fetch index from %s'%url_or_raw)
|
||||
else:
|
||||
@ -544,7 +546,10 @@ class BasicNewsRecipe(object, LoggingInterface):
|
||||
if bn:
|
||||
img = os.path.join(imgdir, 'feed_image_%d%s'%(self.image_counter, os.path.splitext(bn)))
|
||||
try:
|
||||
open(img, 'wb').write(self.browser.open(feed.image_url).read())
|
||||
with open(img, 'wb') as fi:
|
||||
r = self.browser.open(feed.image_url)
|
||||
fi.write(r.read())
|
||||
r.close()
|
||||
self.image_counter += 1
|
||||
feed.image_url = img
|
||||
self.image_map[feed.image_url] = img
|
||||
@ -588,12 +593,11 @@ class BasicNewsRecipe(object, LoggingInterface):
|
||||
return self._fetch_article(url, dir, logger, f, a, num_of_feeds)
|
||||
|
||||
def fetch_embedded_article(self, article, dir, logger, f, a, num_of_feeds):
|
||||
pt = PersistentTemporaryFile('_feeds2disk.html')
|
||||
templ = templates.EmbeddedContent()
|
||||
raw = templ.generate(article).render('html')
|
||||
open(pt.name, 'wb').write(raw)
|
||||
pt.close()
|
||||
url = ('file:'+pt.name) if iswindows else ('file://'+pt.name)
|
||||
with PersistentTemporaryFile('_feeds2disk.html') as f:
|
||||
f.write(raw)
|
||||
url = ('file:'+f.name) if iswindows else ('file://'+f.name)
|
||||
return self._fetch_article(url, dir, logger, f, a, num_of_feeds)
|
||||
|
||||
|
||||
@ -618,7 +622,8 @@ class BasicNewsRecipe(object, LoggingInterface):
|
||||
index = os.path.join(self.output_dir, 'index.html')
|
||||
|
||||
html = self.feeds2index(feeds)
|
||||
open(index, 'wb').write(html)
|
||||
with open(index, 'wb') as fi:
|
||||
fi.write(html)
|
||||
|
||||
self.jobs = []
|
||||
for f, feed in enumerate(feeds):
|
||||
@ -670,7 +675,8 @@ class BasicNewsRecipe(object, LoggingInterface):
|
||||
for f, feed in enumerate(feeds):
|
||||
html = self.feed2index(feed)
|
||||
feed_dir = os.path.join(self.output_dir, 'feed_%d'%f)
|
||||
open(os.path.join(feed_dir, 'index.html'), 'wb').write(html)
|
||||
with open(os.path.join(feed_dir, 'index.html'), 'wb') as fi:
|
||||
fi.write(html)
|
||||
self.create_opf(feeds)
|
||||
self.report_progress(1, _('Feeds downloaded to %s')%index)
|
||||
|
||||
@ -689,8 +695,10 @@ class BasicNewsRecipe(object, LoggingInterface):
|
||||
ext = ext.lower() if ext else 'jpg'
|
||||
self.report_progress(1, _('Downloading cover from %s')%cu)
|
||||
cpath = os.path.join(self.output_dir, 'cover.'+ext)
|
||||
cfile = open(cpath, 'wb')
|
||||
cfile.write(self.browser.open(cu).read())
|
||||
with open(cpath, 'wb') as cfile:
|
||||
r = self.browser.open(cu)
|
||||
cfile.write(r.read())
|
||||
r.close()
|
||||
self.cover_path = cpath
|
||||
|
||||
|
||||
@ -729,7 +737,8 @@ class BasicNewsRecipe(object, LoggingInterface):
|
||||
entries.append(relp.replace(os.sep, '/'))
|
||||
last = sp
|
||||
|
||||
src = open(last, 'rb').read().decode('utf-8')
|
||||
with open(last, 'rb') as fi:
|
||||
src = fi.read().decode('utf-8')
|
||||
soup = BeautifulSoup(src)
|
||||
body = soup.find('body')
|
||||
if body is not None:
|
||||
@ -740,7 +749,8 @@ class BasicNewsRecipe(object, LoggingInterface):
|
||||
center=self.center_navbar)
|
||||
elem = BeautifulSoup(templ.render(doctype='xhtml').decode('utf-8')).find('div')
|
||||
body.insert(len(body.contents), elem)
|
||||
open(last, 'wb').write(unicode(soup).encode('utf-8'))
|
||||
with open(last, 'wb') as fi:
|
||||
fi.write(unicode(soup).encode('utf-8'))
|
||||
|
||||
if len(feeds) > 1:
|
||||
for i, f in enumerate(feeds):
|
||||
@ -755,7 +765,9 @@ class BasicNewsRecipe(object, LoggingInterface):
|
||||
opf.create_spine(entries)
|
||||
opf.set_toc(toc)
|
||||
|
||||
opf.render(open(opf_path, 'wb'), open(ncx_path, 'wb'))
|
||||
with open(opf_path, 'wb') as opf_file:
|
||||
with open(ncx_path, 'wb') as ncx_file:
|
||||
opf.render(opf_file, ncx_file)
|
||||
|
||||
|
||||
def article_downloaded(self, request, result):
|
||||
@ -800,12 +812,13 @@ class BasicNewsRecipe(object, LoggingInterface):
|
||||
else:
|
||||
title, url = obj
|
||||
self.report_progress(0, _('Fetching feed')+' %s...'%(title if title else url))
|
||||
parsed_feeds.append(feed_from_xml(self.browser.open(url).read(),
|
||||
f = self.browser.open(url)
|
||||
parsed_feeds.append(feed_from_xml(f.read(),
|
||||
title=title,
|
||||
oldest_article=self.oldest_article,
|
||||
max_articles_per_feed=self.max_articles_per_feed,
|
||||
get_article_url=self.get_article_url))
|
||||
|
||||
f.close()
|
||||
return parsed_feeds
|
||||
|
||||
@classmethod
|
||||
@ -891,7 +904,8 @@ class CustomIndexRecipe(BasicNewsRecipe):
|
||||
mi = OPFCreator(self.output_dir, mi)
|
||||
mi.create_manifest_from_files_in([self.output_dir])
|
||||
mi.create_spine([os.path.join(self.output_dir, 'index.html')])
|
||||
mi.render(open(os.path.join(self.output_dir, 'index.opf'), 'wb'))
|
||||
with open(os.path.join(self.output_dir, 'index.opf'), 'wb') as opf_file:
|
||||
mi.render(opf_file)
|
||||
|
||||
def download(self):
|
||||
index = os.path.abspath(self.custom_index())
|
||||
|
@ -44,10 +44,9 @@ def save_soup(soup, target):
|
||||
if path and os.path.isfile(path) and os.path.exists(path) and os.path.isabs(path):
|
||||
tag[key] = relpath(path, selfdir).replace(os.sep, '/')
|
||||
|
||||
f = open(target, 'wb')
|
||||
html = unicode(soup)
|
||||
with open(target, 'wb') as f:
|
||||
f.write(html.encode('utf-8'))
|
||||
f.close()
|
||||
|
||||
|
||||
class RecursiveFetcher(object, LoggingInterface):
|
||||
@ -59,6 +58,7 @@ class RecursiveFetcher(object, LoggingInterface):
|
||||
# )
|
||||
# )
|
||||
CSS_IMPORT_PATTERN = re.compile(r'\@import\s+url\((.*?)\)', re.IGNORECASE)
|
||||
default_timeout = socket.getdefaulttimeout() # Needed here as it is used in __del__
|
||||
|
||||
def __init__(self, options, logger, image_map={}, css_map={}, job_info=None):
|
||||
LoggingInterface.__init__(self, logger)
|
||||
@ -99,7 +99,7 @@ class RecursiveFetcher(object, LoggingInterface):
|
||||
def get_soup(self, src):
|
||||
nmassage = copy.copy(BeautifulSoup.MARKUP_MASSAGE)
|
||||
nmassage.extend(self.preprocess_regexps)
|
||||
soup = BeautifulSoup(xml_to_unicode(src, self.verbose)[0], markupMassage=nmassage)
|
||||
soup = BeautifulSoup(xml_to_unicode(src, self.verbose, strip_encoding_pats=True)[0], markupMassage=nmassage)
|
||||
|
||||
if self.keep_only_tags:
|
||||
body = Tag(soup, 'body')
|
||||
@ -145,6 +145,8 @@ class RecursiveFetcher(object, LoggingInterface):
|
||||
if getattr(err, 'reason', [0])[0] == 104: # Connection reset by peer
|
||||
self.log_debug('Connection reset by peer retrying in 1 second.')
|
||||
time.sleep(1)
|
||||
if hasattr(f, 'close'):
|
||||
f.close()
|
||||
f = self.browser.open(url)
|
||||
else:
|
||||
raise err
|
||||
@ -196,11 +198,14 @@ class RecursiveFetcher(object, LoggingInterface):
|
||||
except Exception, err:
|
||||
self.log_warning('Could not fetch stylesheet %s', iurl)
|
||||
self.log_debug('Error: %s', str(err), exc_info=True)
|
||||
if hasattr(f, 'close'): f.close()
|
||||
continue
|
||||
stylepath = os.path.join(diskpath, 'style'+str(c)+'.css')
|
||||
with self.stylemap_lock:
|
||||
self.stylemap[iurl] = stylepath
|
||||
open(stylepath, 'wb').write(f.read())
|
||||
with open(stylepath, 'wb') as x:
|
||||
x.write(f.read())
|
||||
f.close()
|
||||
tag['href'] = stylepath
|
||||
else:
|
||||
for ns in tag.findAll(text=True):
|
||||
@ -219,12 +224,15 @@ class RecursiveFetcher(object, LoggingInterface):
|
||||
except Exception, err:
|
||||
self.log_warning('Could not fetch stylesheet %s', iurl)
|
||||
self.log_debug('Error: %s', str(err), exc_info=True)
|
||||
if hasattr(f, 'close'): f.close()
|
||||
continue
|
||||
c += 1
|
||||
stylepath = os.path.join(diskpath, 'style'+str(c)+'.css')
|
||||
with self.stylemap_lock:
|
||||
self.stylemap[iurl] = stylepath
|
||||
open(stylepath, 'wb').write(f.read())
|
||||
with open(stylepath, 'wb') as x:
|
||||
x.write(f.read())
|
||||
f.close()
|
||||
ns.replaceWith(src.replace(m.group(1), stylepath))
|
||||
|
||||
|
||||
@ -250,6 +258,7 @@ class RecursiveFetcher(object, LoggingInterface):
|
||||
except Exception, err:
|
||||
self.log_warning('Could not fetch image %s', iurl)
|
||||
self.log_debug('Error: %s', str(err), exc_info=True)
|
||||
if hasattr(f, 'close'): f.close()
|
||||
continue
|
||||
c += 1
|
||||
fname = sanitize_file_name('img'+str(c)+ext)
|
||||
@ -258,7 +267,9 @@ class RecursiveFetcher(object, LoggingInterface):
|
||||
imgpath = os.path.join(diskpath, fname)
|
||||
with self.imagemap_lock:
|
||||
self.imagemap[iurl] = imgpath
|
||||
open(imgpath, 'wb').write(f.read())
|
||||
with open(imgpath, 'wb') as x:
|
||||
x.write(f.read())
|
||||
f.close()
|
||||
tag['src'] = imgpath
|
||||
|
||||
def absurl(self, baseurl, tag, key, filter=True):
|
||||
@ -327,6 +338,7 @@ class RecursiveFetcher(object, LoggingInterface):
|
||||
self.current_dir = linkdiskpath
|
||||
f = self.fetch_url(iurl)
|
||||
dsrc = f.read()
|
||||
f.close()
|
||||
if len(dsrc) == 0 or \
|
||||
len(re.compile('<!--.*?-->', re.DOTALL).sub('', dsrc).strip()) == 0:
|
||||
raise ValueError('No content at URL %s'%iurl)
|
||||
@ -378,7 +390,9 @@ class RecursiveFetcher(object, LoggingInterface):
|
||||
return res
|
||||
|
||||
def __del__(self):
|
||||
socket.setdefaulttimeout(self.default_timeout)
|
||||
dt = getattr(self, 'default_timeout', None)
|
||||
if dt is not None:
|
||||
socket.setdefaulttimeout(dt)
|
||||
|
||||
def option_parser(usage=_('%prog URL\n\nWhere URL is for example http://google.com')):
|
||||
parser = OptionParser(usage=usage)
|
||||
|
Loading…
x
Reference in New Issue
Block a user