From d2c5ba9de4caf9729b11ecda5bcf393e560b6e88 Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Sat, 19 Jul 2008 02:09:34 -0700
Subject: [PATCH] Make parsing of archives for ebooks more robust

---
 src/calibre/ebooks/lrf/any/convert_from.py  | 32 +++++++++++----------
 src/calibre/ebooks/lrf/html/convert_from.py |  5 ++--
 2 files changed, 20 insertions(+), 17 deletions(-)
diff --git a/src/calibre/ebooks/lrf/any/convert_from.py b/src/calibre/ebooks/lrf/any/convert_from.py
index c0a1a6500c..246473c2b2 100644
--- a/src/calibre/ebooks/lrf/any/convert_from.py
+++ b/src/calibre/ebooks/lrf/any/convert_from.py
@@ -2,7 +2,7 @@ __license__   = 'GPL v3'
 __copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
 '''Convert any ebook file into a LRF file.'''
 
-import sys, os, logging, shutil, tempfile, glob
+import sys, os, logging, shutil, tempfile, glob, re
 
 from calibre.ebooks import UnknownFormatError
 from calibre.ebooks.lrf import option_parser as _option_parser
@@ -26,17 +26,16 @@ def largest_file(files):
     return file
 
 def find_htmlfile(dir):
-    cwd = os.getcwd()
-    try:
-        os.chdir(dir)
-        for pair in (('*toc*.htm*', '*toc*.xhtm*'), ('*.htm*', '*.xhtm*')):
-            files = glob.glob(pair[0])
-            files += glob.glob(pair[1])
-            file = largest_file(files)
-            if file:
-                return os.path.join(dir, file)
-    finally:
-        os.chdir(cwd)
+    ext_pat = re.compile(r'\.(x){0,1}htm(l){0,1}', re.IGNORECASE)
+    toc_pat = re.compile(r'toc', re.IGNORECASE)
+    toc_files, files = [], []
+    for f in map(lambda x:os.path.join(dir, x), os.listdir(dir)):
+        name, ext = os.path.splitext(f)
+        if ext and ext_pat.match(ext):
+            toc_files.append(f) if toc_pat.search(f) else files.append(f)
+    a = toc_files if toc_files else files
+    if a:
+        return largest_file(a)
 
 def number_of_unhidden_files(base, listing):
     ans = 0
@@ -71,9 +70,12 @@ def handle_archive(path):
     files = []
     cdir = traverse_subdirs(tdir)
     file = None
-    for ext in ('lit', 'rtf', 'fb2','pdf', 'txt', 'epub', 'mobi', 'prc'):
-        pat = os.path.join(cdir, '*.'+ext)
-        files.extend(glob.glob(pat))
+    exts = ['lit', 'rtf', 'fb2','pdf', 'txt', 'epub', 'mobi', 'prc']
+    candidates = map(lambda x:os.path.join(cdir, x), os.listdir(cdir))
+    for ext in exts:
+        for f in candidates:
+            if f.lower().endswith(ext):
+                files.append(f)
     file = largest_file(files)
     if not file:
         file = find_htmlfile(cdir)
diff --git a/src/calibre/ebooks/lrf/html/convert_from.py b/src/calibre/ebooks/lrf/html/convert_from.py
index 8f46f1daef..e602a61156 100644
--- a/src/calibre/ebooks/lrf/html/convert_from.py
+++ b/src/calibre/ebooks/lrf/html/convert_from.py
@@ -1975,8 +1975,9 @@ def try_opf(path, options, logger):
                                 except:
                                     continue
             if not getattr(options, 'cover', None) and orig_cover is not None:
-                options.cover = orig_cover        
-        options.spine = [i.path for i in opf.spine if i.path]
+                options.cover = orig_cover
+        if getattr(opf, 'spine', False):
+            options.spine = [i.path for i in opf.spine if i.path]
         if not getattr(options, 'toc', None):
             options.toc   = opf.toc
     except Exception: