Fix various file access bugs.

This commit is contained in:
Kovid Goyal 2007-10-13 17:30:45 +00:00
parent ba7f030a88
commit 2e52d6dfe3

View File

@ -241,8 +241,6 @@ class HTMLConverter(object):
self.link_level += 1 self.link_level += 1
paths = [link['path'] for link in self.links] paths = [link['path'] for link in self.links]
def is_baen(self, soup): def is_baen(self, soup):
return bool(soup.find('meta', attrs={'name':'Publisher', return bool(soup.find('meta', attrs={'name':'Publisher',
'content':re.compile('Baen', re.IGNORECASE)})) 'content':re.compile('Baen', re.IGNORECASE)}))
@ -1248,7 +1246,7 @@ class HTMLConverter(object):
path = munge_paths(self.target_prefix, tag['href'])[0] path = munge_paths(self.target_prefix, tag['href'])[0]
ext = os.path.splitext(path)[1] ext = os.path.splitext(path)[1]
if ext: ext = ext[1:].lower() if ext: ext = ext[1:].lower()
if os.access(path, os.R_OK): if os.access(path, os.R_OK) and os.path.isfile(path):
if ext in ['png', 'jpg', 'bmp', 'jpeg']: if ext in ['png', 'jpg', 'bmp', 'jpeg']:
self.process_image(path, tag_css) self.process_image(path, tag_css)
else: else:
@ -1260,14 +1258,14 @@ class HTMLConverter(object):
if tag.has_key('id') or tag.has_key('name'): if tag.has_key('id') or tag.has_key('name'):
key = 'name' if tag.has_key('name') else 'id' key = 'name' if tag.has_key('name') else 'id'
self.targets[self.target_prefix+tag[key]] = self.current_block self.targets[self.target_prefix+tag[key]] = self.current_block
else: elif not urlparse(tag['href'])[0]:
self.logger.warn('Could not follow link to '+tag['href']) self.logger.warn('Could not follow link to '+tag['href'])
elif tag.has_key('name') or tag.has_key('id'): elif tag.has_key('name') or tag.has_key('id'):
self.process_anchor(tag, tag_css, tag_pseudo_css) self.process_anchor(tag, tag_css, tag_pseudo_css)
elif tagname == 'img': elif tagname == 'img':
if tag.has_key('src'): if tag.has_key('src'):
path = munge_paths(self.target_prefix, tag['src'])[0] path = munge_paths(self.target_prefix, tag['src'])[0]
if os.access(path, os.R_OK): if os.access(path, os.R_OK) and os.path.isfile(path):
width, height = None, None width, height = None, None
try: try:
width = int(tag['width']) width = int(tag['width'])
@ -1276,7 +1274,7 @@ class HTMLConverter(object):
pass pass
dropcaps = tag.has_key('class') and tag['class'] == 'libprs500_dropcaps' dropcaps = tag.has_key('class') and tag['class'] == 'libprs500_dropcaps'
self.process_image(path, tag_css, width, height, dropcaps=dropcaps) self.process_image(path, tag_css, width, height, dropcaps=dropcaps)
else: elif not urlparse(tag['src'])[0]:
self.logger.warn('Could not find image: '+tag['src']) self.logger.warn('Could not find image: '+tag['src'])
else: else:
self.logger.debug("Failed to process: %s", str(tag)) self.logger.debug("Failed to process: %s", str(tag))
@ -1532,109 +1530,105 @@ def process_file(path, options, logger=None):
level = logging.DEBUG if options.verbose else logging.INFO level = logging.DEBUG if options.verbose else logging.INFO
logger = logging.getLogger('html2lrf') logger = logging.getLogger('html2lrf')
setup_cli_handlers(logger, level) setup_cli_handlers(logger, level)
cwd = os.getcwd() path = os.path.normpath(os.path.abspath(path))
default_title = filename_to_utf8(os.path.splitext(os.path.basename(path))[0]) default_title = filename_to_utf8(os.path.splitext(os.path.basename(path))[0])
dirpath = os.path.dirname(path) dirpath = os.path.dirname(path)
try:
cpath, tpath = '', ''
try_opf(path, options, logger)
if options.cover:
cpath = os.path.join(dirpath, os.path.basename(options.cover))
if not os.path.exists(cpath):
cpath = os.path.abspath(os.path.expanduser(options.cover))
options.cover = cpath
if os.access(options.cover, os.R_OK):
th = Device.THUMBNAIL_HEIGHT
im = PILImage.open(os.path.join(cwd, cpath))
cim = im.resize((options.profile.screen_width,
options.profile.screen_height - options.profile.fudge),
PILImage.BICUBIC).convert('RGB')
cf = PersistentTemporaryFile(prefix=__appname__+"_", suffix=".jpg")
cf.close()
cim.save(cf.name)
cpath = cf.name
tim = im.resize((int(0.75*th), th), PILImage.ANTIALIAS).convert('RGB') tpath = ''
tf = PersistentTemporaryFile(prefix="html2lrf_", suffix=".jpg") try_opf(path, options, logger)
tf.close() if options.cover:
tim.save(tf.name) options.cover = os.path.expanduser(options.cover)
tpath = tf.name if not os.path.isabs(options.cover):
else: options.cover = os.path.join(dirpath, options.cover)
raise ConversionError, 'Cannot read from: %s'% (options.cover,) if os.access(options.cover, os.R_OK):
th = Device.THUMBNAIL_HEIGHT
im = PILImage.open(options.cover)
cim = im.resize((options.profile.screen_width,
options.profile.screen_height - options.profile.fudge),
PILImage.BICUBIC).convert('RGB')
cf = PersistentTemporaryFile(prefix=__appname__+"_", suffix=".jpg")
cf.close()
cim.save(cf.name)
tim = im.resize((int(0.75*th), th), PILImage.ANTIALIAS).convert('RGB')
tf = PersistentTemporaryFile(prefix="html2lrf_", suffix=".jpg")
tf.close()
tim.save(tf.name)
tpath = tf.name
else:
raise ConversionError, 'Cannot read from: %s'% (options.cover,)
if not options.title: if not options.title:
options.title = default_title options.title = default_title
for prop in ('author', 'author_sort', 'title', 'title_sort', 'publisher', 'freetext'): for prop in ('author', 'author_sort', 'title', 'title_sort', 'publisher', 'freetext'):
val = getattr(options, prop) val = getattr(options, prop)
if val and not isinstance(val, unicode): if val and not isinstance(val, unicode):
soup = BeautifulSoup(val) soup = BeautifulSoup(val)
setattr(options, prop, unicode(soup)) setattr(options, prop, unicode(soup))
title = (options.title, options.title_sort) title = (options.title, options.title_sort)
author = (options.author, options.author_sort) author = (options.author, options.author_sort)
args = dict(font_delta=options.font_delta, title=title, \ args = dict(font_delta=options.font_delta, title=title, \
author=author, sourceencoding='utf8',\ author=author, sourceencoding='utf8',\
freetext=options.freetext, category=options.category, freetext=options.freetext, category=options.category,
publisher=options.publisher, publisher=options.publisher,
booksetting=BookSetting(dpi=10*options.profile.dpi, booksetting=BookSetting(dpi=10*options.profile.dpi,
screenheight=options.profile.screen_height, screenheight=options.profile.screen_height,
screenwidth=options.profile.screen_width)) screenwidth=options.profile.screen_width))
if tpath: if tpath:
args['thumbnail'] = tpath args['thumbnail'] = tpath
header = None header = None
if options.header: if options.header:
header = Paragraph() header = Paragraph()
fheader = options.headerformat fheader = options.headerformat
fheader = re.sub(r'([^%]|^)%t', r'\1' + options.title, fheader) fheader = re.sub(r'([^%]|^)%t', r'\1' + options.title, fheader)
fheader = re.sub(r'([^%]|^)%a', r'\1' + options.author, fheader) fheader = re.sub(r'([^%]|^)%a', r'\1' + options.author, fheader)
fheader = re.sub(r'%%a','%a',fheader) fheader = re.sub(r'%%a','%a',fheader)
fheader = re.sub(r'%%t','%t',fheader) fheader = re.sub(r'%%t','%t',fheader)
header.append(fheader + " ") header.append(fheader + " ")
book, fonts = Book(options, logger, header=header, **args) book, fonts = Book(options, logger, header=header, **args)
le = re.compile(options.link_exclude) if options.link_exclude else \ le = re.compile(options.link_exclude) if options.link_exclude else \
re.compile('$') re.compile('$')
pb = re.compile(options.page_break, re.IGNORECASE) if options.page_break else \ pb = re.compile(options.page_break, re.IGNORECASE) if options.page_break else \
re.compile('$') re.compile('$')
fpb = re.compile(options.force_page_break, re.IGNORECASE) if options.force_page_break else \ fpb = re.compile(options.force_page_break, re.IGNORECASE) if options.force_page_break else \
re.compile('$') re.compile('$')
options.cover = cpath options.force_page_break = fpb
options.force_page_break = fpb options.link_exclude = le
options.link_exclude = le options.page_break = pb
options.page_break = pb options.chapter_regex = re.compile(options.chapter_regex, re.IGNORECASE)
options.chapter_regex = re.compile(options.chapter_regex, re.IGNORECASE) fpba = options.force_page_break_attr.split(',')
fpba = options.force_page_break_attr.split(',') if len(fpba) != 3:
if len(fpba) != 3: fpba = ['$', '', '$']
fpba = ['$', '', '$'] options.force_page_break_attr = [re.compile(fpba[0], re.IGNORECASE), fpba[1],
options.force_page_break_attr = [re.compile(fpba[0], re.IGNORECASE), fpba[1], re.compile(fpba[2], re.IGNORECASE)]
re.compile(fpba[2], re.IGNORECASE)] if not hasattr(options, 'anchor_ids'):
if not hasattr(options, 'anchor_ids'): options.anchor_ids = True
options.anchor_ids = True files = options.spine if options.use_spine else [path]
files = options.spine if options.use_spine else [path] conv = HTMLConverter(book, fonts, options, logger, files)
conv = HTMLConverter(book, fonts, options, logger, files) if options.use_spine:
if options.use_spine: conv.create_toc(options.toc)
conv.create_toc(options.toc) oname = options.output
oname = options.output if not oname:
if not oname: suffix = '.lrs' if options.lrs else '.lrf'
suffix = '.lrs' if options.lrs else '.lrf' name = os.path.splitext(os.path.basename(path))[0] + suffix
name = os.path.splitext(os.path.basename(path))[0] + suffix oname = os.path.join(os.getcwd(), name)
oname = os.path.join(cwd,name) oname = os.path.abspath(os.path.expanduser(oname))
oname = os.path.abspath(os.path.expanduser(oname)) conv.writeto(oname, lrs=options.lrs)
conv.writeto(oname, lrs=options.lrs) logger.info('Output written to %s', oname)
logger.info('Output written to %s', oname) conv.cleanup()
conv.cleanup() return oname
return oname
finally:
os.chdir(cwd)
def try_opf(path, options, logger): def try_opf(path, options, logger):
try: try:
opf = glob.glob(os.path.join(os.path.dirname(path),'*.opf'))[0] opf = glob.glob(os.path.join(os.path.dirname(path),'*.opf'))[0]
except IndexError: except IndexError:
return return
opf = OPFReader(open(opf, 'rb'), os.path.dirname(os.path.abspath(opf))) dirpath = os.path.dirname(os.path.abspath(opf))
opf = OPFReader(open(opf, 'rb'), dirpath)
try: try:
title = opf.title title = opf.title
if title and not options.title: if title and not options.title:
@ -1655,7 +1649,8 @@ def try_opf(path, options, logger):
if not options.cover: if not options.cover:
cover = opf.cover cover = opf.cover
if cover: if cover:
cover = os.path.join(os.path.dirname(path), cover) if not os.path.isabs(cover):
cover = os.path.join(dirpath, cover)
if os.access(cover, os.R_OK): if os.access(cover, os.R_OK):
try: try:
PILImage.open(cover) PILImage.open(cover)
@ -1674,7 +1669,7 @@ def try_opf(path, options, logger):
break break
except: except:
continue continue
options.spine = [i.href for i in opf.spine.items()] options.spine = [i.href for i in opf.spine.items()]
options.toc = opf.toc options.toc = opf.toc
except Exception: except Exception:
logger.exception('Failed to process opf file') logger.exception('Failed to process opf file')