mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Make parsing of archives for ebooks more robust
This commit is contained in:
parent
c33df41eb2
commit
d2c5ba9de4
@ -2,7 +2,7 @@ __license__ = 'GPL v3'
|
|||||||
__copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
|
__copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
|
||||||
'''Convert any ebook file into a LRF file.'''
|
'''Convert any ebook file into a LRF file.'''
|
||||||
|
|
||||||
import sys, os, logging, shutil, tempfile, glob
|
import sys, os, logging, shutil, tempfile, glob, re
|
||||||
|
|
||||||
from calibre.ebooks import UnknownFormatError
|
from calibre.ebooks import UnknownFormatError
|
||||||
from calibre.ebooks.lrf import option_parser as _option_parser
|
from calibre.ebooks.lrf import option_parser as _option_parser
|
||||||
@ -26,17 +26,16 @@ def largest_file(files):
|
|||||||
return file
|
return file
|
||||||
|
|
||||||
def find_htmlfile(dir):
|
def find_htmlfile(dir):
|
||||||
cwd = os.getcwd()
|
ext_pat = re.compile(r'\.(x){0,1}htm(l){0,1}', re.IGNORECASE)
|
||||||
try:
|
toc_pat = re.compile(r'toc', re.IGNORECASE)
|
||||||
os.chdir(dir)
|
toc_files, files = [], []
|
||||||
for pair in (('*toc*.htm*', '*toc*.xhtm*'), ('*.htm*', '*.xhtm*')):
|
for f in map(lambda x:os.path.join(dir, x), os.listdir(dir)):
|
||||||
files = glob.glob(pair[0])
|
name, ext = os.path.splitext(f)
|
||||||
files += glob.glob(pair[1])
|
if ext and ext_pat.match(ext):
|
||||||
file = largest_file(files)
|
toc_files.append(f) if toc_pat.search(f) else files.append(f)
|
||||||
if file:
|
a = toc_files if toc_files else files
|
||||||
return os.path.join(dir, file)
|
if a:
|
||||||
finally:
|
return largest_file(a)
|
||||||
os.chdir(cwd)
|
|
||||||
|
|
||||||
def number_of_unhidden_files(base, listing):
|
def number_of_unhidden_files(base, listing):
|
||||||
ans = 0
|
ans = 0
|
||||||
@ -71,9 +70,12 @@ def handle_archive(path):
|
|||||||
files = []
|
files = []
|
||||||
cdir = traverse_subdirs(tdir)
|
cdir = traverse_subdirs(tdir)
|
||||||
file = None
|
file = None
|
||||||
for ext in ('lit', 'rtf', 'fb2','pdf', 'txt', 'epub', 'mobi', 'prc'):
|
exts = ['lit', 'rtf', 'fb2','pdf', 'txt', 'epub', 'mobi', 'prc']
|
||||||
pat = os.path.join(cdir, '*.'+ext)
|
candidates = map(lambda x:os.path.join(cdir, x), os.listdir(cdir))
|
||||||
files.extend(glob.glob(pat))
|
for ext in exts:
|
||||||
|
for f in candidates:
|
||||||
|
if f.lower().endswith(ext):
|
||||||
|
files.append(f)
|
||||||
file = largest_file(files)
|
file = largest_file(files)
|
||||||
if not file:
|
if not file:
|
||||||
file = find_htmlfile(cdir)
|
file = find_htmlfile(cdir)
|
||||||
|
@ -1975,8 +1975,9 @@ def try_opf(path, options, logger):
|
|||||||
except:
|
except:
|
||||||
continue
|
continue
|
||||||
if not getattr(options, 'cover', None) and orig_cover is not None:
|
if not getattr(options, 'cover', None) and orig_cover is not None:
|
||||||
options.cover = orig_cover
|
options.cover = orig_cover
|
||||||
options.spine = [i.path for i in opf.spine if i.path]
|
if getattr(opf, 'spine', False):
|
||||||
|
options.spine = [i.path for i in opf.spine if i.path]
|
||||||
if not getattr(options, 'toc', None):
|
if not getattr(options, 'toc', None):
|
||||||
options.toc = opf.toc
|
options.toc = opf.toc
|
||||||
except Exception:
|
except Exception:
|
||||||
|
Loading…
x
Reference in New Issue
Block a user