mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Fix #1135 (EPUB coversion error)
This commit is contained in:
parent
206da41869
commit
a78037096c
@ -13,7 +13,7 @@ from contextlib import nested
|
|||||||
from calibre import extract, walk
|
from calibre import extract, walk
|
||||||
from calibre.ebooks import DRMError
|
from calibre.ebooks import DRMError
|
||||||
from calibre.ebooks.epub import config as common_config
|
from calibre.ebooks.epub import config as common_config
|
||||||
from calibre.ebooks.epub.from_html import convert as html2epub
|
from calibre.ebooks.epub.from_html import convert as html2epub, find_html_index
|
||||||
from calibre.ptempfile import TemporaryDirectory
|
from calibre.ptempfile import TemporaryDirectory
|
||||||
from calibre.ebooks.metadata import MetaInformation
|
from calibre.ebooks.metadata import MetaInformation
|
||||||
from calibre.ebooks.metadata.opf2 import OPFCreator
|
from calibre.ebooks.metadata.opf2 import OPFCreator
|
||||||
@ -103,18 +103,7 @@ def unarchive(path, tdir):
|
|||||||
if ext in ['txt', 'rtf'] and os.stat(f).st_size < 2048:
|
if ext in ['txt', 'rtf'] and os.stat(f).st_size < 2048:
|
||||||
continue
|
continue
|
||||||
return f, ext
|
return f, ext
|
||||||
html_pat = re.compile(r'\.(x){0,1}htm(l){0,1}$', re.IGNORECASE)
|
return find_html_index(files)
|
||||||
html_files = [f for f in files if html_pat.search(f) is not None]
|
|
||||||
if not html_files:
|
|
||||||
raise ValueError(_('Could not find an ebook inside the archive'))
|
|
||||||
html_files = [(f, os.stat(f).st_size) for f in html_files]
|
|
||||||
html_files.sort(cmp = lambda x, y: cmp(x[1], y[1]))
|
|
||||||
html_files = [f[0] for f in html_files]
|
|
||||||
for q in ('toc', 'index'):
|
|
||||||
for f in html_files:
|
|
||||||
if os.path.splitext(f)[0].lower() == q:
|
|
||||||
return f, os.path.splitext(f)[1].lower()[1:]
|
|
||||||
return html_files[-1], os.path.splitext(html_files[-1])[1].lower()[1:]
|
|
||||||
|
|
||||||
def any2epub(opts, path, notification=None):
|
def any2epub(opts, path, notification=None):
|
||||||
ext = os.path.splitext(path)[1]
|
ext = os.path.splitext(path)[1]
|
||||||
|
@ -32,7 +32,7 @@ Conversion of HTML/OPF files follows several stages:
|
|||||||
* The EPUB container is created.
|
* The EPUB container is created.
|
||||||
'''
|
'''
|
||||||
|
|
||||||
import os, sys, cStringIO, logging
|
import os, sys, cStringIO, logging, re
|
||||||
|
|
||||||
from lxml.etree import XPath
|
from lxml.etree import XPath
|
||||||
try:
|
try:
|
||||||
@ -51,7 +51,25 @@ from calibre.ebooks.epub import initialize_container, PROFILES
|
|||||||
from calibre.ebooks.epub.split import split
|
from calibre.ebooks.epub.split import split
|
||||||
from calibre.ebooks.epub.fonts import Rationalizer
|
from calibre.ebooks.epub.fonts import Rationalizer
|
||||||
from calibre.constants import preferred_encoding
|
from calibre.constants import preferred_encoding
|
||||||
|
from calibre import walk
|
||||||
|
|
||||||
|
def find_html_index(files):
|
||||||
|
'''
|
||||||
|
Given a list of files, find the most likely root HTML file in the
|
||||||
|
list.
|
||||||
|
'''
|
||||||
|
html_pat = re.compile(r'\.(x){0,1}htm(l){0,1}$', re.IGNORECASE)
|
||||||
|
html_files = [f for f in files if html_pat.search(f) is not None]
|
||||||
|
if not html_files:
|
||||||
|
raise ValueError(_('Could not find an ebook inside the archive'))
|
||||||
|
html_files = [(f, os.stat(f).st_size) for f in html_files]
|
||||||
|
html_files.sort(cmp = lambda x, y: cmp(x[1], y[1]))
|
||||||
|
html_files = [f[0] for f in html_files]
|
||||||
|
for q in ('toc', 'index'):
|
||||||
|
for f in html_files:
|
||||||
|
if os.path.splitext(f)[0].lower() == q:
|
||||||
|
return f, os.path.splitext(f)[1].lower()[1:]
|
||||||
|
return html_files[-1], os.path.splitext(html_files[-1])[1].lower()[1:]
|
||||||
|
|
||||||
class HTMLProcessor(Processor, Rationalizer):
|
class HTMLProcessor(Processor, Rationalizer):
|
||||||
|
|
||||||
@ -203,6 +221,10 @@ def convert(htmlfile, opts, notification=None):
|
|||||||
if htmlfile.lower().endswith('.opf'):
|
if htmlfile.lower().endswith('.opf'):
|
||||||
opf = OPF(htmlfile, os.path.dirname(os.path.abspath(htmlfile)))
|
opf = OPF(htmlfile, os.path.dirname(os.path.abspath(htmlfile)))
|
||||||
filelist = opf_traverse(opf, verbose=opts.verbose, encoding=opts.encoding)
|
filelist = opf_traverse(opf, verbose=opts.verbose, encoding=opts.encoding)
|
||||||
|
if not filelist:
|
||||||
|
# Bad OPF look for a HTML file instead
|
||||||
|
htmlfile = find_html_index(walk(os.path.dirname(htmlfile)))[0]
|
||||||
|
filelist = get_filelist(htmlfile, opts)[1]
|
||||||
mi = MetaInformation(opf)
|
mi = MetaInformation(opf)
|
||||||
else:
|
else:
|
||||||
opf, filelist = get_filelist(htmlfile, opts)
|
opf, filelist = get_filelist(htmlfile, opts)
|
||||||
|
@ -304,8 +304,14 @@ def opf_traverse(opf_reader, verbose=0, encoding=None):
|
|||||||
flat[i] = path
|
flat[i] = path
|
||||||
for item in opf_reader.itermanifest():
|
for item in opf_reader.itermanifest():
|
||||||
item.set('href', item.get('href').replace('&', '%26'))
|
item.set('href', item.get('href').replace('&', '%26'))
|
||||||
flat = [HTMLFile(path, 0, encoding, verbose) for path in flat]
|
ans = []
|
||||||
return [f for f in flat if not f.is_binary]
|
for path in flat:
|
||||||
|
if os.path.exists(path):
|
||||||
|
ans.append(HTMLFile(path, 0, encoding, verbose))
|
||||||
|
else:
|
||||||
|
print 'WARNING: OPF spine item %s does not exist'%path
|
||||||
|
ans = [f for f in ans if not f.is_binary]
|
||||||
|
return ans
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user