IGN:Remove temporary files as soon as possible, rather than only at program exit. Fixes for various minor regressions.

This commit is contained in:
Kovid Goyal 2008-09-11 17:08:48 -07:00
parent 6fee09b9d2
commit a679086e53
13 changed files with 239 additions and 143 deletions

View File

@ -8,7 +8,7 @@ __author__ = "Kovid Goyal <kovid@kovidgoyal.net>"
Various run time constants. Various run time constants.
''' '''
import sys, locale, codecs import sys, locale, codecs, os
from calibre.utils.terminfo import TerminalController from calibre.utils.terminfo import TerminalController
terminal_controller = TerminalController(sys.stdout) terminal_controller = TerminalController(sys.stdout)
@ -28,3 +28,36 @@ win32event = __import__('win32event') if iswindows else None
winerror = __import__('winerror') if iswindows else None winerror = __import__('winerror') if iswindows else None
win32api = __import__('win32api') if iswindows else None win32api = __import__('win32api') if iswindows else None
fcntl = None if iswindows else __import__('fcntl') fcntl = None if iswindows else __import__('fcntl')
################################################################################
plugins = None
if plugins is None:
# Load plugins
def load_plugins():
plugins = {}
if isfrozen:
if iswindows:
plugin_path = os.path.join(os.path.dirname(sys.executable), 'plugins')
sys.path.insert(1, os.path.dirname(sys.executable))
elif isosx:
plugin_path = os.path.join(getattr(sys, 'frameworks_dir'), 'plugins')
elif islinux:
plugin_path = os.path.join(getattr(sys, 'frozen_path'), 'plugins')
sys.path.insert(0, plugin_path)
else:
import pkg_resources
plugin_path = getattr(pkg_resources, 'resource_filename')('calibre', 'plugins')
sys.path.insert(0, plugin_path)
for plugin in ['pictureflow', 'lzx', 'msdes'] + \
(['winutil'] if iswindows else []) + \
(['usbobserver'] if isosx else []):
try:
p, err = __import__(plugin), ''
except Exception, err:
p = None
err = str(err)
plugins[plugin] = (p, err)
return plugins
plugins = load_plugins()

View File

@ -1,4 +1,4 @@
#!/usr/bin/env python from __future__ import with_statement
__license__ = 'GPL v3' __license__ = 'GPL v3'
__copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>' __copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
''' '''
@ -8,8 +8,9 @@ from calibre.ebooks.lrf import option_parser as lrf_option_parser
from calibre.ebooks.lrf.html.convert_from import process_file from calibre.ebooks.lrf.html.convert_from import process_file
from calibre.web.feeds.main import option_parser as feeds_option_parser from calibre.web.feeds.main import option_parser as feeds_option_parser
from calibre.web.feeds.main import run_recipe from calibre.web.feeds.main import run_recipe
from calibre.ptempfile import PersistentTemporaryDirectory from calibre.ptempfile import TemporaryDirectory
from calibre import sanitize_file_name, strftime from calibre import sanitize_file_name, strftime
from calibre.ebooks import ConversionError
import sys, os import sys, os
@ -34,25 +35,27 @@ def main(args=sys.argv, notification=None, handler=None):
recipe_arg = args[1] if len(args) > 1 else None recipe_arg = args[1] if len(args) > 1 else None
tdir = PersistentTemporaryDirectory('_feeds2lrf') with TemporaryDirectory('_feeds2lrf') as tdir:
opts.output_dir = tdir opts.output_dir = tdir
recipe = run_recipe(opts, recipe_arg, parser, notification=notification, handler=handler) recipe = run_recipe(opts, recipe_arg, parser, notification=notification, handler=handler)
htmlfile = os.path.join(tdir, 'index.html') htmlfile = os.path.join(tdir, 'index.html')
if not os.access(htmlfile, os.R_OK): if not os.access(htmlfile, os.R_OK):
raise RuntimeError(_('Fetching of recipe failed: ')+recipe_arg) raise RuntimeError(_('Fetching of recipe failed: ')+recipe_arg)
lparser = lrf_option_parser('') lparser = lrf_option_parser('')
ropts = lparser.parse_args(['html2lrf']+recipe.html2lrf_options)[0] ropts = lparser.parse_args(['html2lrf']+recipe.html2lrf_options)[0]
parser.merge_options(ropts, opts) parser.merge_options(ropts, opts)
if not opts.output: if not opts.output:
ext = '.lrs' if opts.lrs else '.lrf' ext = '.lrs' if opts.lrs else '.lrf'
fname = recipe.title + strftime(recipe.timefmt)+ext fname = recipe.title + strftime(recipe.timefmt)+ext
opts.output = os.path.join(os.getcwd(), sanitize_file_name(fname)) opts.output = os.path.join(os.getcwd(), sanitize_file_name(fname))
print 'Generating LRF...' print 'Generating LRF...'
process_file(htmlfile, opts) process_file(htmlfile, opts)
if os.stat(opts.output).st_size < 100: # This can happen if the OS runs out of file handles
raise ConversionError(_('Failed to convert downloaded recipe: ')+recipe_arg)
return 0 return 0
if __name__ == '__main__': if __name__ == '__main__':

View File

@ -15,15 +15,24 @@ from calibre.ebooks.chardet import xml_to_unicode
class MetadataField(object): class MetadataField(object):
def __init__(self, name, is_dc=True): def __init__(self, name, is_dc=True, formatter=None):
self.name = name self.name = name
self.is_dc = is_dc self.is_dc = is_dc
self.formatter = formatter
def __get__(self, obj, type=None): def __get__(self, obj, type=None):
ans = obj.get_metadata_element(self.name) ans = obj.get_metadata_element(self.name)
if ans is None: if ans is None:
return u'' return None
return obj.get_text(ans) ans = obj.get_text(ans)
if ans is None:
return ans
if self.formatter is not None:
try:
ans = self.formatter(ans)
except:
return None
return ans
def __set__(self, obj, val): def __set__(self, obj, val):
elem = obj.get_metadata_element(self.name) elem = obj.get_metadata_element(self.name)
@ -60,8 +69,8 @@ class OPF(object):
comments = MetadataField('description') comments = MetadataField('description')
category = MetadataField('category') category = MetadataField('category')
series = MetadataField('series', is_dc=False) series = MetadataField('series', is_dc=False)
series_index = MetadataField('series_index', is_dc=False) series_index = MetadataField('series_index', is_dc=False, formatter=int)
rating = MetadataField('rating', is_dc=False) rating = MetadataField('rating', is_dc=False, formatter=int)
def __init__(self, stream, basedir): def __init__(self, stream, basedir):
@ -194,12 +203,14 @@ class OPFTest(unittest.TestCase):
self.assertEqual(opf.author_sort, 'Monkey') self.assertEqual(opf.author_sort, 'Monkey')
self.assertEqual(opf.tags, ['One', 'Two']) self.assertEqual(opf.tags, ['One', 'Two'])
self.assertEqual(opf.isbn, '123456789') self.assertEqual(opf.isbn, '123456789')
self.assertEqual(opf.series, None)
self.assertEqual(opf.series_index, None)
def testWriting(self): def testWriting(self):
for test in [('title', 'New & Title'), ('authors', ['One', 'Two']), for test in [('title', 'New & Title'), ('authors', ['One', 'Two']),
('author_sort', "Kitchen"), ('tags', ['Three']), ('author_sort', "Kitchen"), ('tags', ['Three']),
('isbn', 'a'), ('rating', '3')]: ('isbn', 'a'), ('rating', 3)]:
setattr(self.opf, *test) setattr(self.opf, *test)
self.assertEqual(getattr(self.opf, test[0]), test[1]) self.assertEqual(getattr(self.opf, test[0]), test[1])

View File

@ -786,6 +786,10 @@ in which you want to store your books files. Any existing books will be automati
if to_device: if to_device:
self.status_bar.showMessage(_('News fetched. Uploading to device.'), 2000) self.status_bar.showMessage(_('News fetched. Uploading to device.'), 2000)
self.persistent_files.append(pt) self.persistent_files.append(pt)
try:
os.remove(pt.name)
except:
pass
############################################################################ ############################################################################
@ -846,6 +850,7 @@ in which you want to store your books files. Any existing books will be automati
of = PersistentTemporaryFile('.lrf') of = PersistentTemporaryFile('.lrf')
of.close() of.close()
cover = self.library_view.model().db.cover(row) cover = self.library_view.model().db.cover(row)
cf = None
if cover: if cover:
cf = PersistentTemporaryFile('.jpeg') cf = PersistentTemporaryFile('.jpeg')
cf.write(cover) cf.write(cover)
@ -858,7 +863,7 @@ in which you want to store your books files. Any existing books will be automati
description=_('Convert book %d of %d (%s)')%(i+1, len(rows), repr(mi.title))) description=_('Convert book %d of %d (%s)')%(i+1, len(rows), repr(mi.title)))
self.conversion_jobs[job] = (d.cover_file, pt, of, d.output_format, self.conversion_jobs[job] = (cf, pt, of, d.output_format,
self.library_view.model().db.id(row)) self.library_view.model().db.id(row))
res = [] res = []
for row in bad_rows: for row in bad_rows:
@ -882,12 +887,13 @@ in which you want to store your books files. Any existing books will be automati
if mi.title: if mi.title:
options.title = mi.title options.title = mi.title
if mi.authors: if mi.authors:
opts.author = ','.join(mi.authors) options.author = ','.join(mi.authors)
data = None data = None
for fmt in ['cbz', 'cbr']: for fmt in ['cbz', 'cbr']:
try: try:
data = self.library_view.model().db.format(row, fmt.upper()) data = self.library_view.model().db.format(row, fmt.upper())
break if data:
break
except: except:
continue continue
@ -933,7 +939,6 @@ in which you want to store your books files. Any existing books will be automati
'any2lrf', args=[cmdline], 'any2lrf', args=[cmdline],
description=_('Convert book: ')+d.title()) description=_('Convert book: ')+d.title())
self.conversion_jobs[job] = (d.cover_file, pt, of, d.output_format, d.id) self.conversion_jobs[job] = (d.cover_file, pt, of, d.output_format, d.id)
changed = True changed = True
if changed: if changed:
@ -984,14 +989,22 @@ in which you want to store your books files. Any existing books will be automati
self.library_view.model().research() self.library_view.model().research()
def book_converted(self, job): def book_converted(self, job):
of, fmt, book_id = self.conversion_jobs.pop(job)[2:] cf, pt, of, fmt, book_id = self.conversion_jobs.pop(job)
if job.exception is not None: try:
self.job_exception(job) if job.exception is not None:
return self.job_exception(job)
data = open(of.name, 'rb') return
self.library_view.model().db.add_format(book_id, fmt, data, index_is_id=True) data = open(of.name, 'rb')
data.close() self.library_view.model().db.add_format(book_id, fmt, data, index_is_id=True)
self.status_bar.showMessage(job.description + (' completed'), 2000) data.close()
self.status_bar.showMessage(job.description + (' completed'), 2000)
finally:
for f in (cf, of, pt):
try:
if os.path.exists(f.name):
os.remove(f.name)
except:
pass
#############################View book###################################### #############################View book######################################

View File

@ -976,9 +976,15 @@ ALTER TABLE books ADD COLUMN isbn TEXT DEFAULT "" COLLATE NOCASE;
return ans[0] return ans[0]
def series_index(self, index, index_is_id=False): def series_index(self, index, index_is_id=False):
ans = None
if not index_is_id: if not index_is_id:
return self.data[index][10] ans = self.data[index][10]
return self.conn.execute('SELECT series_index FROM books WHERE id=?', (index,)).fetchone()[0] else:
ans = self.conn.execute('SELECT series_index FROM books WHERE id=?', (index,)).fetchone()[0]
try:
return int(ans)
except:
return 1
def books_in_series(self, series_id): def books_in_series(self, series_id):
''' '''
@ -1229,6 +1235,7 @@ ALTER TABLE books ADD COLUMN isbn TEXT DEFAULT "" COLLATE NOCASE;
self.conn.commit() self.conn.commit()
def set_series_index(self, id, idx): def set_series_index(self, id, idx):
idx = int(idx)
self.conn.execute('UPDATE books SET series_index=? WHERE id=?', (int(idx), id)) self.conn.execute('UPDATE books SET series_index=? WHERE id=?', (int(idx), id))
self.conn.commit() self.conn.commit()
row = self.row(id) row = self.row(id)

View File

@ -513,6 +513,21 @@ class LibraryDatabase2(LibraryDatabase):
p.loadFromData(data) p.loadFromData(data)
p.save(path) p.save(path)
def formats(self, index, index_is_id=False):
''' Return available formats as a comma separated list '''
id = index if index_is_id else self.id(index)
path = os.path.join(self.library_path, self.path(id, index_is_id=True))
formats = self.conn.execute('SELECT format FROM data WHERE book=?', (id,)).fetchall()
name = self.conn.execute('SELECT name FROM data WHERE book=?', (id,)).fetchone()[0]
formats = map(lambda x:x[0], formats)
ans = []
for format in formats:
_format = ('.' + format.lower()) if format else ''
if os.access(os.path.join(path, name+_format), os.R_OK|os.W_OK):
ans.append(format)
return ','.join(ans)
def format(self, index, format, index_is_id=False, as_file=False, mode='r+b'): def format(self, index, format, index_is_id=False, as_file=False, mode='r+b'):
''' '''
Return the ebook format as a bytestring or `None` if the format doesn't exist, Return the ebook format as a bytestring or `None` if the format doesn't exist,
@ -529,7 +544,7 @@ class LibraryDatabase2(LibraryDatabase):
if os.access(path, os.R_OK|os.W_OK): if os.access(path, os.R_OK|os.W_OK):
f = open(path, mode) f = open(path, mode)
return f if as_file else f.read() return f if as_file else f.read()
self.remove_format(id, format, index_is_id=True) self.remove_format(id, format, index_is_id=True)
def add_format(self, index, format, stream, index_is_id=False, path=None): def add_format(self, index, format, stream, index_is_id=False, path=None):
id = index if index_is_id else self.id(index) id = index if index_is_id else self.id(index)
@ -571,8 +586,10 @@ class LibraryDatabase2(LibraryDatabase):
if name: if name:
ext = ('.' + format.lower()) if format else '' ext = ('.' + format.lower()) if format else ''
path = os.path.join(path, name+ext) path = os.path.join(path, name+ext)
if os.access(path, os.W_OK): try:
os.remove(path) os.remove(path)
except:
pass
self.conn.execute('DELETE FROM data WHERE book=? AND format=?', (id, format.upper())) self.conn.execute('DELETE FROM data WHERE book=? AND format=?', (id, format.upper()))
self.conn.commit() self.conn.commit()
@ -664,6 +681,9 @@ class LibraryDatabase2(LibraryDatabase):
self.data.set(row, 9, series) self.data.set(row, 9, series)
def set_series_index(self, id, idx): def set_series_index(self, id, idx):
if idx is None:
idx = 1
idx = int(idx)
self.conn.execute('UPDATE books SET series_index=? WHERE id=?', (int(idx), id)) self.conn.execute('UPDATE books SET series_index=? WHERE id=?', (int(idx), id))
self.conn.commit() self.conn.commit()
row = self.row(id) row = self.row(id)

View File

@ -25,7 +25,7 @@ the worker interrupts the job and dies. The sending of progress and console outp
is buffered and asynchronous to prevent the job from being IO bound. is buffered and asynchronous to prevent the job from being IO bound.
''' '''
import sys, os, gc, cPickle, traceback, atexit, cStringIO, time, signal, \ import sys, os, gc, cPickle, traceback, atexit, cStringIO, time, signal, \
subprocess, socket, collections, binascii, re, thread, tempfile subprocess, socket, collections, binascii, re, thread, tempfile, atexit
from select import select from select import select
from threading import RLock, Thread, Event from threading import RLock, Thread, Event
from math import ceil from math import ceil
@ -855,8 +855,14 @@ def get_func(name):
func = getattr(module, func) func = getattr(module, func)
return func, kwdargs, notification return func, kwdargs, notification
_atexit = collections.deque()
def myatexit(func, *args, **kwargs):
_atexit.append((func, args, kwargs))
def work(client_socket, func, args, kwdargs): def work(client_socket, func, args, kwdargs):
sys.stdout.last_report = time.time() sys.stdout.last_report = time.time()
orig = atexit.register
atexit.register = myatexit
try: try:
func, kargs, notification = get_func(func) func, kargs, notification = get_func(func)
if notification is not None and hasattr(sys.stdout, 'notify'): if notification is not None and hasattr(sys.stdout, 'notify'):
@ -867,7 +873,18 @@ def work(client_socket, func, args, kwdargs):
sys.stdout.send() sys.stdout.send()
return res return res
finally: finally:
atexit.register = orig
sys.stdout.last_report = None sys.stdout.last_report = None
while True:
try:
func, args, kwargs = _atexit.pop()
except IndexError:
break
try:
func(*args, **kwargs)
except (Exception, SystemExit):
continue
time.sleep(5) # Give any in progress BufferedSend time to complete time.sleep(5) # Give any in progress BufferedSend time to complete

View File

@ -9,30 +9,6 @@ import tempfile, os, atexit, shutil
from calibre import __version__, __appname__ from calibre import __version__, __appname__
class _TemporaryFileWrapper(object):
"""
Temporary file wrapper
This class provides a wrapper around files opened for
temporary use. In particular, it seeks to automatically
remove the file when the object is deleted.
"""
def __init__(self, _file, name):
self.file = _file
self.name = name
atexit.register(cleanup, name)
def __getattr__(self, name):
_file = self.__dict__['file']
a = getattr(_file, name)
if type(a) != type(0):
setattr(self, name, a)
return a
def __del__(self):
self.close()
def cleanup(path): def cleanup(path):
try: try:
import os import os
@ -41,18 +17,36 @@ def cleanup(path):
except: except:
pass pass
def PersistentTemporaryFile(suffix="", prefix="", dir=None): class PersistentTemporaryFile(object):
""" """
Return a temporary file that is available even after being closed on A file-like object that is a temporary file that is available even after being closed on
all platforms. It is automatically deleted on normal program termination. all platforms. It is automatically deleted on normal program termination.
Uses tempfile.mkstemp to create the file. The file is opened in mode 'wb'.
""" """
if prefix == None: _file = None
prefix = ""
fd, name = tempfile.mkstemp(suffix, __appname__+"_"+ __version__+"_" + prefix, def __init__(self, suffix="", prefix="", dir=None, mode='w+b'):
dir=dir) if prefix == None:
_file = os.fdopen(fd, 'w+b') prefix = ""
return _TemporaryFileWrapper(_file, name) fd, name = tempfile.mkstemp(suffix, __appname__+"_"+ __version__+"_" + prefix,
dir=dir)
self._file = os.fdopen(fd, 'w+b')
self._name = name
atexit.register(cleanup, name)
def __getattr__(self, name):
if name == 'name':
return self.__dict__['_name']
return getattr(self.__dict__['_file'], name)
def __enter__(self):
return self
def __exit__(self, *args):
self.close()
def __del__(self):
self.close()
def PersistentTemporaryDirectory(suffix='', prefix='', dir=None): def PersistentTemporaryDirectory(suffix='', prefix='', dir=None):
''' '''
@ -64,6 +58,9 @@ def PersistentTemporaryDirectory(suffix='', prefix='', dir=None):
return tdir return tdir
class TemporaryDirectory(str): class TemporaryDirectory(str):
'''
A temporary directory to be used ina with statement.
'''
def __init__(self, suffix='', prefix='', dir=None): def __init__(self, suffix='', prefix='', dir=None):
self.suffix = suffix self.suffix = suffix
self.prefix = prefix self.prefix = prefix

View File

@ -13,14 +13,14 @@ from gettext import GNUTranslations
import __builtin__ import __builtin__
__builtin__.__dict__['_'] = lambda s: s __builtin__.__dict__['_'] = lambda s: s
from calibre.constants import iswindows, isosx, islinux, isfrozen,\ from calibre.constants import iswindows, preferred_encoding, plugins
preferred_encoding
from calibre.translations.msgfmt import make
from calibre.utils.config import prefs from calibre.utils.config import prefs
from calibre.translations.msgfmt import make
_run_once = False _run_once = False
if not _run_once: if not _run_once:
_run_once = True _run_once = True
################################################################################ ################################################################################
# Setup translations # Setup translations
@ -74,38 +74,6 @@ if not _run_once:
except: except:
pass pass
################################################################################
# Load plugins
def load_plugins():
plugins = {}
if isfrozen:
if iswindows:
plugin_path = os.path.join(os.path.dirname(sys.executable), 'plugins')
sys.path.insert(1, os.path.dirname(sys.executable))
elif isosx:
plugin_path = os.path.join(getattr(sys, 'frameworks_dir'), 'plugins')
elif islinux:
plugin_path = os.path.join(getattr(sys, 'frozen_path'), 'plugins')
sys.path.insert(0, plugin_path)
else:
import pkg_resources
plugin_path = getattr(pkg_resources, 'resource_filename')('calibre', 'plugins')
sys.path.insert(0, plugin_path)
for plugin in ['pictureflow', 'lzx', 'msdes'] + \
(['winutil'] if iswindows else []) + \
(['usbobserver'] if isosx else []):
try:
p, err = __import__(plugin), ''
except Exception, err:
p = None
err = str(err)
plugins[plugin] = (p, err)
return plugins
plugins = load_plugins()
################################################################################ ################################################################################
# Improve builtin path functions to handle unicode sensibly # Improve builtin path functions to handle unicode sensibly

View File

@ -13,12 +13,11 @@ from optparse import OptionParser as _OptionParser
from optparse import IndentedHelpFormatter from optparse import IndentedHelpFormatter
from PyQt4.QtCore import QString from PyQt4.QtCore import QString
from calibre.constants import terminal_controller, iswindows, isosx, \ from calibre.constants import terminal_controller, iswindows, isosx, \
__appname__, __version__, __author__ __appname__, __version__, __author__, plugins
from calibre.utils.lock import LockError, ExclusiveFile from calibre.utils.lock import LockError, ExclusiveFile
from collections import defaultdict from collections import defaultdict
if iswindows: if iswindows:
from calibre import plugins
config_dir = plugins['winutil'][0].special_folder_path(plugins['winutil'][0].CSIDL_APPDATA) config_dir = plugins['winutil'][0].special_folder_path(plugins['winutil'][0].CSIDL_APPDATA)
if not os.access(config_dir, os.W_OK|os.X_OK): if not os.access(config_dir, os.W_OK|os.X_OK):
config_dir = os.path.expanduser('~') config_dir = os.path.expanduser('~')

View File

@ -1,4 +1,4 @@
#!/usr/bin/env python from __future__ import with_statement
__license__ = 'GPL v3' __license__ = 'GPL v3'
__copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>' __copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
''' '''
@ -313,7 +313,9 @@ class BasicNewsRecipe(object, LoggingInterface):
`url_or_raw`: Either a URL or the downloaded index page as a string `url_or_raw`: Either a URL or the downloaded index page as a string
''' '''
if re.match(r'\w+://', url_or_raw): if re.match(r'\w+://', url_or_raw):
raw = self.browser.open(url_or_raw).read() f = self.browser.open(url_or_raw)
raw = f.read()
f.close()
if not raw: if not raw:
raise RuntimeError('Could not fetch index from %s'%url_or_raw) raise RuntimeError('Could not fetch index from %s'%url_or_raw)
else: else:
@ -544,7 +546,10 @@ class BasicNewsRecipe(object, LoggingInterface):
if bn: if bn:
img = os.path.join(imgdir, 'feed_image_%d%s'%(self.image_counter, os.path.splitext(bn))) img = os.path.join(imgdir, 'feed_image_%d%s'%(self.image_counter, os.path.splitext(bn)))
try: try:
open(img, 'wb').write(self.browser.open(feed.image_url).read()) with open(img, 'wb') as fi:
r = self.browser.open(feed.image_url)
fi.write(r.read())
r.close()
self.image_counter += 1 self.image_counter += 1
feed.image_url = img feed.image_url = img
self.image_map[feed.image_url] = img self.image_map[feed.image_url] = img
@ -588,12 +593,11 @@ class BasicNewsRecipe(object, LoggingInterface):
return self._fetch_article(url, dir, logger, f, a, num_of_feeds) return self._fetch_article(url, dir, logger, f, a, num_of_feeds)
def fetch_embedded_article(self, article, dir, logger, f, a, num_of_feeds): def fetch_embedded_article(self, article, dir, logger, f, a, num_of_feeds):
pt = PersistentTemporaryFile('_feeds2disk.html')
templ = templates.EmbeddedContent() templ = templates.EmbeddedContent()
raw = templ.generate(article).render('html') raw = templ.generate(article).render('html')
open(pt.name, 'wb').write(raw) with PersistentTemporaryFile('_feeds2disk.html') as f:
pt.close() f.write(raw)
url = ('file:'+pt.name) if iswindows else ('file://'+pt.name) url = ('file:'+f.name) if iswindows else ('file://'+f.name)
return self._fetch_article(url, dir, logger, f, a, num_of_feeds) return self._fetch_article(url, dir, logger, f, a, num_of_feeds)
@ -618,7 +622,8 @@ class BasicNewsRecipe(object, LoggingInterface):
index = os.path.join(self.output_dir, 'index.html') index = os.path.join(self.output_dir, 'index.html')
html = self.feeds2index(feeds) html = self.feeds2index(feeds)
open(index, 'wb').write(html) with open(index, 'wb') as fi:
fi.write(html)
self.jobs = [] self.jobs = []
for f, feed in enumerate(feeds): for f, feed in enumerate(feeds):
@ -670,7 +675,8 @@ class BasicNewsRecipe(object, LoggingInterface):
for f, feed in enumerate(feeds): for f, feed in enumerate(feeds):
html = self.feed2index(feed) html = self.feed2index(feed)
feed_dir = os.path.join(self.output_dir, 'feed_%d'%f) feed_dir = os.path.join(self.output_dir, 'feed_%d'%f)
open(os.path.join(feed_dir, 'index.html'), 'wb').write(html) with open(os.path.join(feed_dir, 'index.html'), 'wb') as fi:
fi.write(html)
self.create_opf(feeds) self.create_opf(feeds)
self.report_progress(1, _('Feeds downloaded to %s')%index) self.report_progress(1, _('Feeds downloaded to %s')%index)
@ -689,8 +695,10 @@ class BasicNewsRecipe(object, LoggingInterface):
ext = ext.lower() if ext else 'jpg' ext = ext.lower() if ext else 'jpg'
self.report_progress(1, _('Downloading cover from %s')%cu) self.report_progress(1, _('Downloading cover from %s')%cu)
cpath = os.path.join(self.output_dir, 'cover.'+ext) cpath = os.path.join(self.output_dir, 'cover.'+ext)
cfile = open(cpath, 'wb') with open(cpath, 'wb') as cfile:
cfile.write(self.browser.open(cu).read()) r = self.browser.open(cu)
cfile.write(r.read())
r.close()
self.cover_path = cpath self.cover_path = cpath
@ -729,7 +737,8 @@ class BasicNewsRecipe(object, LoggingInterface):
entries.append(relp.replace(os.sep, '/')) entries.append(relp.replace(os.sep, '/'))
last = sp last = sp
src = open(last, 'rb').read().decode('utf-8') with open(last, 'rb') as fi:
src = fi.read().decode('utf-8')
soup = BeautifulSoup(src) soup = BeautifulSoup(src)
body = soup.find('body') body = soup.find('body')
if body is not None: if body is not None:
@ -740,7 +749,8 @@ class BasicNewsRecipe(object, LoggingInterface):
center=self.center_navbar) center=self.center_navbar)
elem = BeautifulSoup(templ.render(doctype='xhtml').decode('utf-8')).find('div') elem = BeautifulSoup(templ.render(doctype='xhtml').decode('utf-8')).find('div')
body.insert(len(body.contents), elem) body.insert(len(body.contents), elem)
open(last, 'wb').write(unicode(soup).encode('utf-8')) with open(last, 'wb') as fi:
fi.write(unicode(soup).encode('utf-8'))
if len(feeds) > 1: if len(feeds) > 1:
for i, f in enumerate(feeds): for i, f in enumerate(feeds):
@ -755,7 +765,9 @@ class BasicNewsRecipe(object, LoggingInterface):
opf.create_spine(entries) opf.create_spine(entries)
opf.set_toc(toc) opf.set_toc(toc)
opf.render(open(opf_path, 'wb'), open(ncx_path, 'wb')) with open(opf_path, 'wb') as opf_file:
with open(ncx_path, 'wb') as ncx_file:
opf.render(opf_file, ncx_file)
def article_downloaded(self, request, result): def article_downloaded(self, request, result):
@ -800,12 +812,13 @@ class BasicNewsRecipe(object, LoggingInterface):
else: else:
title, url = obj title, url = obj
self.report_progress(0, _('Fetching feed')+' %s...'%(title if title else url)) self.report_progress(0, _('Fetching feed')+' %s...'%(title if title else url))
parsed_feeds.append(feed_from_xml(self.browser.open(url).read(), f = self.browser.open(url)
parsed_feeds.append(feed_from_xml(f.read(),
title=title, title=title,
oldest_article=self.oldest_article, oldest_article=self.oldest_article,
max_articles_per_feed=self.max_articles_per_feed, max_articles_per_feed=self.max_articles_per_feed,
get_article_url=self.get_article_url)) get_article_url=self.get_article_url))
f.close()
return parsed_feeds return parsed_feeds
@classmethod @classmethod
@ -891,7 +904,8 @@ class CustomIndexRecipe(BasicNewsRecipe):
mi = OPFCreator(self.output_dir, mi) mi = OPFCreator(self.output_dir, mi)
mi.create_manifest_from_files_in([self.output_dir]) mi.create_manifest_from_files_in([self.output_dir])
mi.create_spine([os.path.join(self.output_dir, 'index.html')]) mi.create_spine([os.path.join(self.output_dir, 'index.html')])
mi.render(open(os.path.join(self.output_dir, 'index.opf'), 'wb')) with open(os.path.join(self.output_dir, 'index.opf'), 'wb') as opf_file:
mi.render(opf_file)
def download(self): def download(self):
index = os.path.abspath(self.custom_index()) index = os.path.abspath(self.custom_index())

View File

@ -33,14 +33,14 @@ class Economist(BasicNewsRecipe):
return br return br
def parse_index(self): def parse_index(self):
soup = BeautifulSoup(self.browser.open(self.INDEX).read(), soup = BeautifulSoup(self.browser.open(self.INDEX).read(),
convertEntities=BeautifulSoup.HTML_ENTITIES) convertEntities=BeautifulSoup.HTML_ENTITIES)
index_started = False index_started = False
feeds = {} feeds = {}
ans = [] ans = []
key = None key = None
for tag in soup.findAll(['h1', 'h2']): for tag in soup.findAll(['h1', 'h2']):
text = ''.join(tag.findAll(text=True)) text = ''.join(tag.findAll(text=True))
if tag.name == 'h1': if tag.name == 'h1':
if 'Classified ads' in text: if 'Classified ads' in text:
break break

View File

@ -44,11 +44,10 @@ def save_soup(soup, target):
if path and os.path.isfile(path) and os.path.exists(path) and os.path.isabs(path): if path and os.path.isfile(path) and os.path.exists(path) and os.path.isabs(path):
tag[key] = relpath(path, selfdir).replace(os.sep, '/') tag[key] = relpath(path, selfdir).replace(os.sep, '/')
f = open(target, 'wb')
html = unicode(soup) html = unicode(soup)
f.write(html.encode('utf-8')) with open(target, 'wb') as f:
f.close() f.write(html.encode('utf-8'))
class RecursiveFetcher(object, LoggingInterface): class RecursiveFetcher(object, LoggingInterface):
LINK_FILTER = tuple(re.compile(i, re.IGNORECASE) for i in LINK_FILTER = tuple(re.compile(i, re.IGNORECASE) for i in
@ -59,6 +58,7 @@ class RecursiveFetcher(object, LoggingInterface):
# ) # )
# ) # )
CSS_IMPORT_PATTERN = re.compile(r'\@import\s+url\((.*?)\)', re.IGNORECASE) CSS_IMPORT_PATTERN = re.compile(r'\@import\s+url\((.*?)\)', re.IGNORECASE)
default_timeout = socket.getdefaulttimeout() # Needed here as it is used in __del__
def __init__(self, options, logger, image_map={}, css_map={}, job_info=None): def __init__(self, options, logger, image_map={}, css_map={}, job_info=None):
LoggingInterface.__init__(self, logger) LoggingInterface.__init__(self, logger)
@ -99,7 +99,7 @@ class RecursiveFetcher(object, LoggingInterface):
def get_soup(self, src): def get_soup(self, src):
nmassage = copy.copy(BeautifulSoup.MARKUP_MASSAGE) nmassage = copy.copy(BeautifulSoup.MARKUP_MASSAGE)
nmassage.extend(self.preprocess_regexps) nmassage.extend(self.preprocess_regexps)
soup = BeautifulSoup(xml_to_unicode(src, self.verbose)[0], markupMassage=nmassage) soup = BeautifulSoup(xml_to_unicode(src, self.verbose, strip_encoding_pats=True)[0], markupMassage=nmassage)
if self.keep_only_tags: if self.keep_only_tags:
body = Tag(soup, 'body') body = Tag(soup, 'body')
@ -145,6 +145,8 @@ class RecursiveFetcher(object, LoggingInterface):
if getattr(err, 'reason', [0])[0] == 104: # Connection reset by peer if getattr(err, 'reason', [0])[0] == 104: # Connection reset by peer
self.log_debug('Connection reset by peer retrying in 1 second.') self.log_debug('Connection reset by peer retrying in 1 second.')
time.sleep(1) time.sleep(1)
if hasattr(f, 'close'):
f.close()
f = self.browser.open(url) f = self.browser.open(url)
else: else:
raise err raise err
@ -196,11 +198,14 @@ class RecursiveFetcher(object, LoggingInterface):
except Exception, err: except Exception, err:
self.log_warning('Could not fetch stylesheet %s', iurl) self.log_warning('Could not fetch stylesheet %s', iurl)
self.log_debug('Error: %s', str(err), exc_info=True) self.log_debug('Error: %s', str(err), exc_info=True)
if hasattr(f, 'close'): f.close()
continue continue
stylepath = os.path.join(diskpath, 'style'+str(c)+'.css') stylepath = os.path.join(diskpath, 'style'+str(c)+'.css')
with self.stylemap_lock: with self.stylemap_lock:
self.stylemap[iurl] = stylepath self.stylemap[iurl] = stylepath
open(stylepath, 'wb').write(f.read()) with open(stylepath, 'wb') as x:
x.write(f.read())
f.close()
tag['href'] = stylepath tag['href'] = stylepath
else: else:
for ns in tag.findAll(text=True): for ns in tag.findAll(text=True):
@ -219,12 +224,15 @@ class RecursiveFetcher(object, LoggingInterface):
except Exception, err: except Exception, err:
self.log_warning('Could not fetch stylesheet %s', iurl) self.log_warning('Could not fetch stylesheet %s', iurl)
self.log_debug('Error: %s', str(err), exc_info=True) self.log_debug('Error: %s', str(err), exc_info=True)
if hasattr(f, 'close'): f.close()
continue continue
c += 1 c += 1
stylepath = os.path.join(diskpath, 'style'+str(c)+'.css') stylepath = os.path.join(diskpath, 'style'+str(c)+'.css')
with self.stylemap_lock: with self.stylemap_lock:
self.stylemap[iurl] = stylepath self.stylemap[iurl] = stylepath
open(stylepath, 'wb').write(f.read()) with open(stylepath, 'wb') as x:
x.write(f.read())
f.close()
ns.replaceWith(src.replace(m.group(1), stylepath)) ns.replaceWith(src.replace(m.group(1), stylepath))
@ -250,6 +258,7 @@ class RecursiveFetcher(object, LoggingInterface):
except Exception, err: except Exception, err:
self.log_warning('Could not fetch image %s', iurl) self.log_warning('Could not fetch image %s', iurl)
self.log_debug('Error: %s', str(err), exc_info=True) self.log_debug('Error: %s', str(err), exc_info=True)
if hasattr(f, 'close'): f.close()
continue continue
c += 1 c += 1
fname = sanitize_file_name('img'+str(c)+ext) fname = sanitize_file_name('img'+str(c)+ext)
@ -258,7 +267,9 @@ class RecursiveFetcher(object, LoggingInterface):
imgpath = os.path.join(diskpath, fname) imgpath = os.path.join(diskpath, fname)
with self.imagemap_lock: with self.imagemap_lock:
self.imagemap[iurl] = imgpath self.imagemap[iurl] = imgpath
open(imgpath, 'wb').write(f.read()) with open(imgpath, 'wb') as x:
x.write(f.read())
f.close()
tag['src'] = imgpath tag['src'] = imgpath
def absurl(self, baseurl, tag, key, filter=True): def absurl(self, baseurl, tag, key, filter=True):
@ -327,6 +338,7 @@ class RecursiveFetcher(object, LoggingInterface):
self.current_dir = linkdiskpath self.current_dir = linkdiskpath
f = self.fetch_url(iurl) f = self.fetch_url(iurl)
dsrc = f.read() dsrc = f.read()
f.close()
if len(dsrc) == 0 or \ if len(dsrc) == 0 or \
len(re.compile('<!--.*?-->', re.DOTALL).sub('', dsrc).strip()) == 0: len(re.compile('<!--.*?-->', re.DOTALL).sub('', dsrc).strip()) == 0:
raise ValueError('No content at URL %s'%iurl) raise ValueError('No content at URL %s'%iurl)
@ -378,7 +390,9 @@ class RecursiveFetcher(object, LoggingInterface):
return res return res
def __del__(self): def __del__(self):
socket.setdefaulttimeout(self.default_timeout) dt = getattr(self, 'default_timeout', None)
if dt is not None:
socket.setdefaulttimeout(dt)
def option_parser(usage=_('%prog URL\n\nWhere URL is for example http://google.com')): def option_parser(usage=_('%prog URL\n\nWhere URL is for example http://google.com')):
parser = OptionParser(usage=usage) parser = OptionParser(usage=usage)