Handle lit files with no top level content

2025-06-23 15:30:45 -04:00 · 2007-10-26 19:29:14 +00:00 · 2007-10-26 19:29:14 +00:00 · d0af243f96
commit d0af243f96
parent 97989a52e6
3 changed files with 40 additions and 21 deletions
--- a/src/libprs500/ebooks/lrf/html/convert_from.py
+++ b/src/libprs500/ebooks/lrf/html/convert_from.py
@ -1650,7 +1650,6 @@ def process_file(path, options, logger=None):
                                     re.compile(fpba[2], re.IGNORECASE)]
    if not hasattr(options, 'anchor_ids'):
        options.anchor_ids = True
    options.use_spine = options.use_spine and options.toc.toc is not None
    files = options.spine if options.use_spine else [path]
    conv = HTMLConverter(book, fonts, options, logger, files)
    if options.use_spine:
@ -1667,10 +1666,13 @@ def process_file(path, options, logger=None):
    return oname
 def try_opf(path, options, logger):
-    try:
+    if hasattr(options, 'opf'):
-        opf = glob.glob(os.path.join(os.path.dirname(path),'*.opf'))[0]
+        opf = options.opf
-    except IndexError:
+    else:
-        return
+        try:
            opf = glob.glob(os.path.join(os.path.dirname(path),'*.opf'))[0]
        except IndexError:
            return
    dirpath = os.path.dirname(os.path.abspath(opf))
    opf = OPFReader(open(opf, 'rb'), dirpath)    
    try:
--- a/src/libprs500/ebooks/lrf/lit/convert_from.py
+++ b/src/libprs500/ebooks/lrf/lit/convert_from.py
@ -12,6 +12,7 @@
 ##    You should have received a copy of the GNU General Public License along
 ##    with this program; if not, write to the Free Software Foundation, Inc.,
 ##    51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
 from libprs500.ebooks.metadata.opf import OPFReader
 import os, sys, shutil, glob, logging
 from tempfile import mkdtemp
@ -56,30 +57,39 @@ def process_file(path, options, logger=None):
    lit = os.path.abspath(os.path.expanduser(path))
    tdir = generate_html(lit, logger)
    try:
-        l = glob.glob(os.path.join(tdir, '*toc*.htm*'))
+        opf = glob.glob(os.path.join(tdir, '*.opf'))
-        if not l:
+        if opf:
-            l = glob.glob(os.path.join(tdir, '*top*.htm*'))
+            path = opf[0]
-        if not l:
+            opf = OPFReader(path)
-            l = glob.glob(os.path.join(tdir, '*contents*.htm*'))
+            htmlfile = opf.spine.items().next().href
-        if not l:
+            print htmlfile
-            l = glob.glob(os.path.join(tdir, '*.htm*'))
+            options.opf = path
        else:    
            l = glob.glob(os.path.join(tdir, '*toc*.htm*'))
            if not l:
-                l = glob.glob(os.path.join(tdir, '*.txt*')) # Some lit file apparently have .txt files in them
+                l = glob.glob(os.path.join(tdir, '*top*.htm*'))
            if not l:
                l = glob.glob(os.path.join(tdir, '*contents*.htm*'))
            if not l:
                l = glob.glob(os.path.join(tdir, '*.htm*'))
                if not l:
-                    raise ConversionError('Conversion of lit to html failed. Cannot find html file.')
+                    l = glob.glob(os.path.join(tdir, '*.txt*')) # Some lit file apparently have .txt files in them
-            maxsize, htmlfile = 0, None
+                    if not l:
-            for c in l:
+                        raise ConversionError('Conversion of lit to html failed. Cannot find html file.')
-                sz = os.path.getsize(c)
+                maxsize, htmlfile = 0, None
-                if sz > maxsize:
+                for c in l:
-                    maxsize, htmlfile = sz, c
+                    sz = os.path.getsize(c)
-        else:
+                    if sz > maxsize:
-            htmlfile = l[0]
+                        maxsize, htmlfile = sz, c
            else:
                htmlfile = l[0]
        if not options.output:
            ext = '.lrs' if options.lrs else '.lrf'
            options.output = os.path.abspath(os.path.basename(os.path.splitext(path)[0]) + ext)
        options.output = os.path.abspath(os.path.expanduser(options.output))
        options.minimum_indent = 100
        options.use_spine = True
        html_process_file(htmlfile, options, logger=logger)
    finally:
        shutil.rmtree(tdir)
--- a/src/libprs500/ebooks/metadata/opf.py
+++ b/src/libprs500/ebooks/metadata/opf.py
@ -95,10 +95,17 @@ class OPFReader(MetaInformation):
    ENTITY_PATTERN = re.compile(r'&(\S+);')
    def __init__(self, stream, dir=os.getcwd()):
        manage = False
        if not hasattr(stream, 'read'):
            manage = True
            dir = os.path.dirname(stream)
            stream = open(stream, 'rb')
        self.default_title = stream.name if hasattr(stream, 'name') else 'Unknown' 
        if hasattr(stream, 'seek'):
            stream.seek(0)
        self.soup = BeautifulStoneSoup(stream.read())
        if manage:
            stream.close()
        self.series = self.series_index = self.rating = None
        self.manifest = Manifest(self.soup, dir)
        self.spine = Spine(self.soup, self.manifest)