Make parsing of archives for ebooks more robust

This commit is contained in:
Kovid Goyal 2008-07-19 02:09:34 -07:00
parent c33df41eb2
commit d2c5ba9de4
2 changed files with 20 additions and 17 deletions

View File

@ -2,7 +2,7 @@ __license__ = 'GPL v3'
__copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>' __copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
'''Convert any ebook file into a LRF file.''' '''Convert any ebook file into a LRF file.'''
import sys, os, logging, shutil, tempfile, glob import sys, os, logging, shutil, tempfile, glob, re
from calibre.ebooks import UnknownFormatError from calibre.ebooks import UnknownFormatError
from calibre.ebooks.lrf import option_parser as _option_parser from calibre.ebooks.lrf import option_parser as _option_parser
@ -26,17 +26,16 @@ def largest_file(files):
return file return file
def find_htmlfile(dir): def find_htmlfile(dir):
cwd = os.getcwd() ext_pat = re.compile(r'\.(x){0,1}htm(l){0,1}', re.IGNORECASE)
try: toc_pat = re.compile(r'toc', re.IGNORECASE)
os.chdir(dir) toc_files, files = [], []
for pair in (('*toc*.htm*', '*toc*.xhtm*'), ('*.htm*', '*.xhtm*')): for f in map(lambda x:os.path.join(dir, x), os.listdir(dir)):
files = glob.glob(pair[0]) name, ext = os.path.splitext(f)
files += glob.glob(pair[1]) if ext and ext_pat.match(ext):
file = largest_file(files) toc_files.append(f) if toc_pat.search(f) else files.append(f)
if file: a = toc_files if toc_files else files
return os.path.join(dir, file) if a:
finally: return largest_file(a)
os.chdir(cwd)
def number_of_unhidden_files(base, listing): def number_of_unhidden_files(base, listing):
ans = 0 ans = 0
@ -71,9 +70,12 @@ def handle_archive(path):
files = [] files = []
cdir = traverse_subdirs(tdir) cdir = traverse_subdirs(tdir)
file = None file = None
for ext in ('lit', 'rtf', 'fb2','pdf', 'txt', 'epub', 'mobi', 'prc'): exts = ['lit', 'rtf', 'fb2','pdf', 'txt', 'epub', 'mobi', 'prc']
pat = os.path.join(cdir, '*.'+ext) candidates = map(lambda x:os.path.join(cdir, x), os.listdir(cdir))
files.extend(glob.glob(pat)) for ext in exts:
for f in candidates:
if f.lower().endswith(ext):
files.append(f)
file = largest_file(files) file = largest_file(files)
if not file: if not file:
file = find_htmlfile(cdir) file = find_htmlfile(cdir)

View File

@ -1975,8 +1975,9 @@ def try_opf(path, options, logger):
except: except:
continue continue
if not getattr(options, 'cover', None) and orig_cover is not None: if not getattr(options, 'cover', None) and orig_cover is not None:
options.cover = orig_cover options.cover = orig_cover
options.spine = [i.path for i in opf.spine if i.path] if getattr(opf, 'spine', False):
options.spine = [i.path for i in opf.spine if i.path]
if not getattr(options, 'toc', None): if not getattr(options, 'toc', None):
options.toc = opf.toc options.toc = opf.toc
except Exception: except Exception: