Handle lit files with no top level content

2025-12-21 04:25:12 -05:00 · 2007-10-26 19:29:14 +00:00 · 2007-10-26 19:29:14 +00:00 · d0af243f96
commit d0af243f96
parent 97989a52e6
3 changed files with 40 additions and 21 deletions
--- a/src/libprs500/ebooks/lrf/html/convert_from.py
+++ b/src/libprs500/ebooks/lrf/html/convert_from.py
@ -1650,7 +1650,6 @@ def process_file(path, options, logger=None):
                                     re.compile(fpba[2], re.IGNORECASE)]
    if not hasattr(options, 'anchor_ids'):
        options.anchor_ids = True
-    options.use_spine = options.use_spine and options.toc.toc is not None
    files = options.spine if options.use_spine else [path]
    conv = HTMLConverter(book, fonts, options, logger, files)
    if options.use_spine:
@ -1667,10 +1666,13 @@ def process_file(path, options, logger=None):
    return oname
    
 def try_opf(path, options, logger):
-    try:
-        opf = glob.glob(os.path.join(os.path.dirname(path),'*.opf'))[0]
-    except IndexError:
-        return
+    if hasattr(options, 'opf'):
+        opf = options.opf
+    else:
+        try:
+            opf = glob.glob(os.path.join(os.path.dirname(path),'*.opf'))[0]
+        except IndexError:
+            return
    dirpath = os.path.dirname(os.path.abspath(opf))
    opf = OPFReader(open(opf, 'rb'), dirpath)    
    try:
--- a/src/libprs500/ebooks/lrf/lit/convert_from.py
+++ b/src/libprs500/ebooks/lrf/lit/convert_from.py
@ -12,6 +12,7 @@
 ##    You should have received a copy of the GNU General Public License along
 ##    with this program; if not, write to the Free Software Foundation, Inc.,
 ##    51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+from libprs500.ebooks.metadata.opf import OPFReader

 import os, sys, shutil, glob, logging
 from tempfile import mkdtemp
@ -56,30 +57,39 @@ def process_file(path, options, logger=None):
    lit = os.path.abspath(os.path.expanduser(path))
    tdir = generate_html(lit, logger)
    try:
-        l = glob.glob(os.path.join(tdir, '*toc*.htm*'))
-        if not l:
-            l = glob.glob(os.path.join(tdir, '*top*.htm*'))
-        if not l:
-            l = glob.glob(os.path.join(tdir, '*contents*.htm*'))
-        if not l:
-            l = glob.glob(os.path.join(tdir, '*.htm*'))
-            if not l:
-                l = glob.glob(os.path.join(tdir, '*.txt*')) # Some lit file apparently have .txt files in them
-                if not l:
-                    raise ConversionError('Conversion of lit to html failed. Cannot find html file.')
-            maxsize, htmlfile = 0, None
-            for c in l:
-                sz = os.path.getsize(c)
-                if sz > maxsize:
-                    maxsize, htmlfile = sz, c
+        opf = glob.glob(os.path.join(tdir, '*.opf'))
+        if opf:
+            path = opf[0]
+            opf = OPFReader(path)
+            htmlfile = opf.spine.items().next().href
+            print htmlfile
+            options.opf = path
        else:    
-            htmlfile = l[0]
+            l = glob.glob(os.path.join(tdir, '*toc*.htm*'))
+            if not l:
+                l = glob.glob(os.path.join(tdir, '*top*.htm*'))
+            if not l:
+                l = glob.glob(os.path.join(tdir, '*contents*.htm*'))
+            if not l:
+                l = glob.glob(os.path.join(tdir, '*.htm*'))
+                if not l:
+                    l = glob.glob(os.path.join(tdir, '*.txt*')) # Some lit file apparently have .txt files in them
+                    if not l:
+                        raise ConversionError('Conversion of lit to html failed. Cannot find html file.')
+                maxsize, htmlfile = 0, None
+                for c in l:
+                    sz = os.path.getsize(c)
+                    if sz > maxsize:
+                        maxsize, htmlfile = sz, c
+            else:
+                htmlfile = l[0]
        if not options.output:
            ext = '.lrs' if options.lrs else '.lrf'
            options.output = os.path.abspath(os.path.basename(os.path.splitext(path)[0]) + ext)
        options.output = os.path.abspath(os.path.expanduser(options.output))
        options.minimum_indent = 100
        options.use_spine = True
+        
        html_process_file(htmlfile, options, logger=logger)
    finally:
        shutil.rmtree(tdir)
--- a/src/libprs500/ebooks/metadata/opf.py
+++ b/src/libprs500/ebooks/metadata/opf.py
@ -95,10 +95,17 @@ class OPFReader(MetaInformation):
    ENTITY_PATTERN = re.compile(r'&(\S+);')
    
    def __init__(self, stream, dir=os.getcwd()):
+        manage = False
+        if not hasattr(stream, 'read'):
+            manage = True
+            dir = os.path.dirname(stream)
+            stream = open(stream, 'rb')
        self.default_title = stream.name if hasattr(stream, 'name') else 'Unknown' 
        if hasattr(stream, 'seek'):
            stream.seek(0)
        self.soup = BeautifulStoneSoup(stream.read())
+        if manage:
+            stream.close()
        self.series = self.series_index = self.rating = None
        self.manifest = Manifest(self.soup, dir)
        self.spine = Spine(self.soup, self.manifest)