Improve handling of hanging indents when converting from MOBI files. Also condense NCX when generating EPUB if it is larger than the flow size limit. Fixes #1545 (Mobi to ePub conversion problem)

This commit is contained in:
Kovid Goyal 2009-01-03 20:21:49 -08:00
parent 8f2dd9cf84
commit 2e1e37aaf3
2 changed files with 23 additions and 3 deletions

View File

@ -35,7 +35,7 @@ Conversion of HTML/OPF files follows several stages:
import os, sys, cStringIO, logging, re, functools, shutil import os, sys, cStringIO, logging, re, functools, shutil
from lxml.etree import XPath from lxml.etree import XPath
from lxml import html from lxml import html, etree
from PyQt4.Qt import QApplication, QPixmap from PyQt4.Qt import QApplication, QPixmap
from calibre.ebooks.html import Processor, merge_metadata, get_filelist,\ from calibre.ebooks.html import Processor, merge_metadata, get_filelist,\
@ -61,7 +61,7 @@ def remove_bad_link(element, attribute, link, pos):
element.set(attribute, '') element.set(attribute, '')
del element.attrib[attribute] del element.attrib[attribute]
def check(opf_path, pretty_print): def check_links(opf_path, pretty_print):
''' '''
Find and remove all invalid links in the HTML files Find and remove all invalid links in the HTML files
''' '''
@ -284,6 +284,16 @@ def find_oeb_cover(htmlfile):
if match: if match:
return match.group(1) return match.group(1)
def condense_ncx(ncx_path):
tree = etree.parse(ncx_path)
for tag in tree.getroot().iter(tag=etree.Element):
if tag.text:
tag.text = tag.text.strip()
if tag.tail:
tag.tail = tag.tail.strip()
compressed = etree.tostring(tree.getroot(), encoding='utf-8')
open(ncx_path, 'wb').write(compressed)
def convert(htmlfile, opts, notification=None, create_epub=True, def convert(htmlfile, opts, notification=None, create_epub=True,
oeb_cover=False, extract_to=None): oeb_cover=False, extract_to=None):
htmlfile = os.path.abspath(htmlfile) htmlfile = os.path.abspath(htmlfile)
@ -366,7 +376,8 @@ def convert(htmlfile, opts, notification=None, create_epub=True,
if opts.show_ncx: if opts.show_ncx:
print toc print toc
split(opf_path, opts, stylesheet_map) split(opf_path, opts, stylesheet_map)
check(opf_path, opts.pretty_print) check_links(opf_path, opts.pretty_print)
opf = OPF(opf_path, tdir) opf = OPF(opf_path, tdir)
opf.remove_guide() opf.remove_guide()
oeb_cover_file = None oeb_cover_file = None
@ -387,6 +398,13 @@ def convert(htmlfile, opts, notification=None, create_epub=True,
if not raw.startswith('<?xml '): if not raw.startswith('<?xml '):
raw = '<?xml version="1.0" encoding="UTF-8"?>\n'+raw raw = '<?xml version="1.0" encoding="UTF-8"?>\n'+raw
f.write(raw) f.write(raw)
ncx_path = os.path.join(os.path.dirname(opf_path), 'toc.ncx')
if os.path.exists(ncx_path) and os.stat(ncx_path).st_size > opts.profile.flow_size:
logger.info('Condensing NCX from %d bytes...'%os.stat(ncx_path).st_size)
condense_ncx(ncx_path)
if os.stat(ncx_path).st_size > opts.profile.flow_size:
logger.warn('NCX still larger than allowed size at %d bytes. Menu based Table of Contents may not work on device.'%os.stat(ncx_path).st_size)
if create_epub: if create_epub:
epub = initialize_container(opts.output) epub = initialize_container(opts.output)
epub.add_dir(tdir) epub.add_dir(tdir)

View File

@ -257,6 +257,8 @@ class MobiReader(object):
pass pass
try: try:
styles.append('text-indent: %s' % tag['width']) styles.append('text-indent: %s' % tag['width'])
if tag['width'].startswith('-'):
styles.append('margin-left: %s'%(tag['width'][1:]))
del tag['width'] del tag['width']
except KeyError: except KeyError:
pass pass