Handle lit files with no top level content

This commit is contained in:
Kovid Goyal 2007-10-26 19:29:14 +00:00
parent 97989a52e6
commit d0af243f96
3 changed files with 40 additions and 21 deletions

View File

@ -1650,7 +1650,6 @@ def process_file(path, options, logger=None):
re.compile(fpba[2], re.IGNORECASE)]
if not hasattr(options, 'anchor_ids'):
options.anchor_ids = True
options.use_spine = options.use_spine and options.toc.toc is not None
files = options.spine if options.use_spine else [path]
conv = HTMLConverter(book, fonts, options, logger, files)
if options.use_spine:
@ -1667,10 +1666,13 @@ def process_file(path, options, logger=None):
return oname
def try_opf(path, options, logger):
try:
opf = glob.glob(os.path.join(os.path.dirname(path),'*.opf'))[0]
except IndexError:
return
if hasattr(options, 'opf'):
opf = options.opf
else:
try:
opf = glob.glob(os.path.join(os.path.dirname(path),'*.opf'))[0]
except IndexError:
return
dirpath = os.path.dirname(os.path.abspath(opf))
opf = OPFReader(open(opf, 'rb'), dirpath)
try:

View File

@ -12,6 +12,7 @@
## You should have received a copy of the GNU General Public License along
## with this program; if not, write to the Free Software Foundation, Inc.,
## 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
from libprs500.ebooks.metadata.opf import OPFReader
import os, sys, shutil, glob, logging
from tempfile import mkdtemp
@ -56,30 +57,39 @@ def process_file(path, options, logger=None):
lit = os.path.abspath(os.path.expanduser(path))
tdir = generate_html(lit, logger)
try:
l = glob.glob(os.path.join(tdir, '*toc*.htm*'))
if not l:
l = glob.glob(os.path.join(tdir, '*top*.htm*'))
if not l:
l = glob.glob(os.path.join(tdir, '*contents*.htm*'))
if not l:
l = glob.glob(os.path.join(tdir, '*.htm*'))
opf = glob.glob(os.path.join(tdir, '*.opf'))
if opf:
path = opf[0]
opf = OPFReader(path)
htmlfile = opf.spine.items().next().href
print htmlfile
options.opf = path
else:
l = glob.glob(os.path.join(tdir, '*toc*.htm*'))
if not l:
l = glob.glob(os.path.join(tdir, '*.txt*')) # Some lit file apparently have .txt files in them
l = glob.glob(os.path.join(tdir, '*top*.htm*'))
if not l:
l = glob.glob(os.path.join(tdir, '*contents*.htm*'))
if not l:
l = glob.glob(os.path.join(tdir, '*.htm*'))
if not l:
raise ConversionError('Conversion of lit to html failed. Cannot find html file.')
maxsize, htmlfile = 0, None
for c in l:
sz = os.path.getsize(c)
if sz > maxsize:
maxsize, htmlfile = sz, c
else:
htmlfile = l[0]
l = glob.glob(os.path.join(tdir, '*.txt*')) # Some lit file apparently have .txt files in them
if not l:
raise ConversionError('Conversion of lit to html failed. Cannot find html file.')
maxsize, htmlfile = 0, None
for c in l:
sz = os.path.getsize(c)
if sz > maxsize:
maxsize, htmlfile = sz, c
else:
htmlfile = l[0]
if not options.output:
ext = '.lrs' if options.lrs else '.lrf'
options.output = os.path.abspath(os.path.basename(os.path.splitext(path)[0]) + ext)
options.output = os.path.abspath(os.path.expanduser(options.output))
options.minimum_indent = 100
options.use_spine = True
html_process_file(htmlfile, options, logger=logger)
finally:
shutil.rmtree(tdir)

View File

@ -95,10 +95,17 @@ class OPFReader(MetaInformation):
ENTITY_PATTERN = re.compile(r'&(\S+);')
def __init__(self, stream, dir=os.getcwd()):
manage = False
if not hasattr(stream, 'read'):
manage = True
dir = os.path.dirname(stream)
stream = open(stream, 'rb')
self.default_title = stream.name if hasattr(stream, 'name') else 'Unknown'
if hasattr(stream, 'seek'):
stream.seek(0)
self.soup = BeautifulStoneSoup(stream.read())
if manage:
stream.close()
self.series = self.series_index = self.rating = None
self.manifest = Manifest(self.soup, dir)
self.spine = Spine(self.soup, self.manifest)