mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Added LIT input plugin. Ported splitting code now works (at least on the handful of files I've tested)
This commit is contained in:
parent
b9f80aa229
commit
3e29dfbe56
@ -280,6 +280,7 @@ from calibre.ebooks.epub.input import EPUBInput
|
|||||||
from calibre.ebooks.mobi.input import MOBIInput
|
from calibre.ebooks.mobi.input import MOBIInput
|
||||||
from calibre.ebooks.pdf.input import PDFInput
|
from calibre.ebooks.pdf.input import PDFInput
|
||||||
from calibre.ebooks.txt.input import TXTInput
|
from calibre.ebooks.txt.input import TXTInput
|
||||||
|
from calibre.ebooks.lit.input import LITInput
|
||||||
from calibre.ebooks.html.input import HTMLInput
|
from calibre.ebooks.html.input import HTMLInput
|
||||||
from calibre.ebooks.oeb.output import OEBOutput
|
from calibre.ebooks.oeb.output import OEBOutput
|
||||||
from calibre.ebooks.txt.output import TXTOutput
|
from calibre.ebooks.txt.output import TXTOutput
|
||||||
@ -287,7 +288,7 @@ from calibre.ebooks.pdf.output import PDFOutput
|
|||||||
from calibre.customize.profiles import input_profiles, output_profiles
|
from calibre.customize.profiles import input_profiles, output_profiles
|
||||||
|
|
||||||
plugins = [HTML2ZIP, EPUBInput, MOBIInput, PDFInput, HTMLInput,
|
plugins = [HTML2ZIP, EPUBInput, MOBIInput, PDFInput, HTMLInput,
|
||||||
TXTInput, OEBOutput, TXTOutput, PDFOutput]
|
TXTInput, OEBOutput, TXTOutput, PDFOutput, LITInput]
|
||||||
plugins += [x for x in list(locals().values()) if isinstance(x, type) and \
|
plugins += [x for x in list(locals().values()) if isinstance(x, type) and \
|
||||||
x.__name__.endswith('MetadataReader')]
|
x.__name__.endswith('MetadataReader')]
|
||||||
plugins += [x for x in list(locals().values()) if isinstance(x, type) and \
|
plugins += [x for x in list(locals().values()) if isinstance(x, type) and \
|
||||||
|
@ -41,6 +41,11 @@ class ConversionOption(object):
|
|||||||
def __eq__(self, other):
|
def __eq__(self, other):
|
||||||
return hash(self) == hash(other)
|
return hash(self) == hash(other)
|
||||||
|
|
||||||
|
def clone(self):
|
||||||
|
return ConversionOption(name=self.name, help=self.help,
|
||||||
|
long_switch=self.long_switch, short_switch=self.short_switch,
|
||||||
|
choices=self.choices)
|
||||||
|
|
||||||
class OptionRecommendation(object):
|
class OptionRecommendation(object):
|
||||||
LOW = 1
|
LOW = 1
|
||||||
MED = 2
|
MED = 2
|
||||||
@ -59,6 +64,10 @@ class OptionRecommendation(object):
|
|||||||
|
|
||||||
self.validate_parameters()
|
self.validate_parameters()
|
||||||
|
|
||||||
|
def clone(self):
|
||||||
|
return OptionRecommendation(recommended_value=self.recommended_value,
|
||||||
|
level=self.level, option=self.option.clone())
|
||||||
|
|
||||||
def validate_parameters(self):
|
def validate_parameters(self):
|
||||||
if self.option.choices and self.recommended_value not in \
|
if self.option.choices and self.recommended_value not in \
|
||||||
self.option.choices:
|
self.option.choices:
|
||||||
@ -170,8 +179,14 @@ class InputFormatPlugin(Plugin):
|
|||||||
options.debug_input = os.path.abspath(options.debug_input)
|
options.debug_input = os.path.abspath(options.debug_input)
|
||||||
if not os.path.exists(options.debug_input):
|
if not os.path.exists(options.debug_input):
|
||||||
os.makedirs(options.debug_input)
|
os.makedirs(options.debug_input)
|
||||||
|
if isinstance(ret, basestring):
|
||||||
shutil.rmtree(options.debug_input)
|
shutil.rmtree(options.debug_input)
|
||||||
shutil.copytree(output_dir, options.debug_input)
|
shutil.copytree(output_dir, options.debug_input)
|
||||||
|
else:
|
||||||
|
from calibre.ebooks.oeb.writer import OEBWriter
|
||||||
|
w = OEBWriter(pretty_print=options.pretty_print)
|
||||||
|
w(ret, options.debug_input)
|
||||||
|
|
||||||
log.info('Input debug saved to:', options.debug_input)
|
log.info('Input debug saved to:', options.debug_input)
|
||||||
|
|
||||||
return ret
|
return ret
|
||||||
|
@ -57,7 +57,7 @@ def check_command_line_options(parser, args, log):
|
|||||||
raise SystemExit(1)
|
raise SystemExit(1)
|
||||||
|
|
||||||
output = args[2]
|
output = args[2]
|
||||||
if output.startswith('.'):
|
if output.startswith('.') and output != '.':
|
||||||
output = os.path.splitext(os.path.basename(input))[0]+output
|
output = os.path.splitext(os.path.basename(input))[0]+output
|
||||||
output = os.path.abspath(output)
|
output = os.path.abspath(output)
|
||||||
|
|
||||||
@ -171,6 +171,7 @@ def main(args=sys.argv):
|
|||||||
|
|
||||||
plumber.run()
|
plumber.run()
|
||||||
|
|
||||||
|
if plumber.opts.debug_input is None:
|
||||||
log(_('Output saved to'), ' ', plumber.output)
|
log(_('Output saved to'), ' ', plumber.output)
|
||||||
|
|
||||||
return 0
|
return 0
|
||||||
|
@ -32,8 +32,8 @@ class Plumber(object):
|
|||||||
:param input: Path to input file.
|
:param input: Path to input file.
|
||||||
:param output: Path to output file/directory
|
:param output: Path to output file/directory
|
||||||
'''
|
'''
|
||||||
self.input = input
|
self.input = os.path.abspath(input)
|
||||||
self.output = output
|
self.output = os.path.abspath(output)
|
||||||
self.log = log
|
self.log = log
|
||||||
|
|
||||||
# Initialize the conversion options that are independent of input and
|
# Initialize the conversion options that are independent of input and
|
||||||
@ -188,15 +188,15 @@ OptionRecommendation(name='language',
|
|||||||
]
|
]
|
||||||
|
|
||||||
|
|
||||||
input_fmt = os.path.splitext(input)[1]
|
input_fmt = os.path.splitext(self.input)[1]
|
||||||
if not input_fmt:
|
if not input_fmt:
|
||||||
raise ValueError('Input file must have an extension')
|
raise ValueError('Input file must have an extension')
|
||||||
input_fmt = input_fmt[1:].lower()
|
input_fmt = input_fmt[1:].lower()
|
||||||
|
|
||||||
if os.path.exists(output) and os.path.isdir(output):
|
if os.path.exists(self.output) and os.path.isdir(self.output):
|
||||||
output_fmt = 'oeb'
|
output_fmt = 'oeb'
|
||||||
else:
|
else:
|
||||||
output_fmt = os.path.splitext(output)[1]
|
output_fmt = os.path.splitext(self.output)[1]
|
||||||
if not output_fmt:
|
if not output_fmt:
|
||||||
output_fmt = '.oeb'
|
output_fmt = '.oeb'
|
||||||
output_fmt = output_fmt[1:].lower()
|
output_fmt = output_fmt[1:].lower()
|
||||||
@ -323,6 +323,9 @@ OptionRecommendation(name='language',
|
|||||||
self.oeb = self.input_plugin(open(self.input, 'rb'), self.opts,
|
self.oeb = self.input_plugin(open(self.input, 'rb'), self.opts,
|
||||||
self.input_fmt, self.log,
|
self.input_fmt, self.log,
|
||||||
accelerators, tdir)
|
accelerators, tdir)
|
||||||
|
if self.opts.debug_input is not None:
|
||||||
|
self.log('Debug input called, aborting the rest of the pipeline.')
|
||||||
|
return
|
||||||
if not hasattr(self.oeb, 'manifest'):
|
if not hasattr(self.oeb, 'manifest'):
|
||||||
self.oeb = create_oebbook(self.log, self.oeb, self.opts)
|
self.oeb = create_oebbook(self.log, self.oeb, self.opts)
|
||||||
|
|
||||||
@ -365,18 +368,20 @@ OptionRecommendation(name='language',
|
|||||||
self.output_plugin.convert(self.oeb, self.output, self.input_plugin,
|
self.output_plugin.convert(self.oeb, self.output, self.input_plugin,
|
||||||
self.opts, self.log)
|
self.opts, self.log)
|
||||||
|
|
||||||
def create_oebbook(log, opfpath, opts):
|
def create_oebbook(log, path_or_stream, opts, reader=None):
|
||||||
'''
|
'''
|
||||||
Create an OEBBook from an OPF file.
|
Create an OEBBook.
|
||||||
'''
|
'''
|
||||||
from calibre.ebooks.oeb.reader import OEBReader
|
|
||||||
from calibre.ebooks.oeb.base import OEBBook
|
from calibre.ebooks.oeb.base import OEBBook
|
||||||
html_preprocessor = HTMLPreProcessor()
|
html_preprocessor = HTMLPreProcessor()
|
||||||
reader = OEBReader()
|
|
||||||
oeb = OEBBook(log, html_preprocessor=html_preprocessor,
|
oeb = OEBBook(log, html_preprocessor=html_preprocessor,
|
||||||
pretty_print=opts.pretty_print)
|
pretty_print=opts.pretty_print)
|
||||||
# Read OEB Book into OEBBook
|
# Read OEB Book into OEBBook
|
||||||
log.info('Parsing all content...')
|
log('Parsing all content...')
|
||||||
reader(oeb, opfpath)
|
if reader is None:
|
||||||
|
from calibre.ebooks.oeb.reader import OEBReader
|
||||||
|
reader = OEBReader
|
||||||
|
|
||||||
|
reader()(oeb, path_or_stream)
|
||||||
return oeb
|
return oeb
|
||||||
|
|
||||||
|
@ -252,6 +252,14 @@ class HTMLInput(InputFormatPlugin):
|
|||||||
)
|
)
|
||||||
),
|
),
|
||||||
|
|
||||||
|
OptionRecommendation(name='dont_package',
|
||||||
|
recommended_value=False, level=OptionRecommendation.LOW,
|
||||||
|
help=_('Normally this input plugin re-arranges all the input '
|
||||||
|
'files into a standard folder hierarchy. Only use this option '
|
||||||
|
'if you know what you are doing as it can result in various '
|
||||||
|
'nasty side effects in the rest of of the conversion pipeline.'
|
||||||
|
)
|
||||||
|
),
|
||||||
])
|
])
|
||||||
|
|
||||||
def convert(self, stream, opts, file_ext, log,
|
def convert(self, stream, opts, file_ext, log,
|
||||||
@ -276,6 +284,9 @@ class HTMLInput(InputFormatPlugin):
|
|||||||
mi.render(open('metadata.opf', 'wb'))
|
mi.render(open('metadata.opf', 'wb'))
|
||||||
opfpath = os.path.abspath('metadata.opf')
|
opfpath = os.path.abspath('metadata.opf')
|
||||||
|
|
||||||
|
if opts.dont_package:
|
||||||
|
return opfpath
|
||||||
|
|
||||||
from calibre.ebooks.conversion.plumber import create_oebbook
|
from calibre.ebooks.conversion.plumber import create_oebbook
|
||||||
oeb = create_oebbook(log, opfpath, opts)
|
oeb = create_oebbook(log, opfpath, opts)
|
||||||
|
|
||||||
|
24
src/calibre/ebooks/lit/input.py
Normal file
24
src/calibre/ebooks/lit/input.py
Normal file
@ -0,0 +1,24 @@
|
|||||||
|
#!/usr/bin/env python
|
||||||
|
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
|
||||||
|
from __future__ import with_statement
|
||||||
|
|
||||||
|
__license__ = 'GPL v3'
|
||||||
|
__copyright__ = '2009, Kovid Goyal <kovid@kovidgoyal.net>'
|
||||||
|
__docformat__ = 'restructuredtext en'
|
||||||
|
|
||||||
|
from calibre.customize.conversion import InputFormatPlugin
|
||||||
|
|
||||||
|
class LITInput(InputFormatPlugin):
|
||||||
|
|
||||||
|
name = 'LIT Input'
|
||||||
|
author = 'Marshall T. Vandegrift'
|
||||||
|
description = 'Convert LIT files to HTML'
|
||||||
|
file_types = set(['lit'])
|
||||||
|
|
||||||
|
def convert(self, stream, options, file_ext, log,
|
||||||
|
accelerators):
|
||||||
|
from calibre.ebooks.lit.reader import LitReader
|
||||||
|
from calibre.ebooks.conversion.plumber import create_oebbook
|
||||||
|
return create_oebbook(log, stream, options, reader=LitReader)
|
||||||
|
|
||||||
|
|
@ -7,13 +7,12 @@ __license__ = 'GPL v3'
|
|||||||
__copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net> ' \
|
__copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net> ' \
|
||||||
'and Marshall T. Vandegrift <llasram@gmail.com>'
|
'and Marshall T. Vandegrift <llasram@gmail.com>'
|
||||||
|
|
||||||
import sys, struct, os
|
import struct, os
|
||||||
import functools
|
import functools
|
||||||
import re
|
import re
|
||||||
from urlparse import urldefrag
|
from urlparse import urldefrag
|
||||||
from cStringIO import StringIO
|
from cStringIO import StringIO
|
||||||
from urllib import unquote as urlunquote
|
from urllib import unquote as urlunquote
|
||||||
from lxml import etree
|
|
||||||
from calibre.ebooks.lit import LitError
|
from calibre.ebooks.lit import LitError
|
||||||
from calibre.ebooks.lit.maps import OPF_MAP, HTML_MAP
|
from calibre.ebooks.lit.maps import OPF_MAP, HTML_MAP
|
||||||
import calibre.ebooks.lit.mssha1 as mssha1
|
import calibre.ebooks.lit.mssha1 as mssha1
|
||||||
|
@ -272,11 +272,7 @@ def XPath(expr):
|
|||||||
def xpath(elem, expr):
|
def xpath(elem, expr):
|
||||||
return elem.xpath(expr, namespaces=XPNSMAP)
|
return elem.xpath(expr, namespaces=XPNSMAP)
|
||||||
|
|
||||||
def _prepare_xml_for_serialization(root):
|
|
||||||
pass
|
|
||||||
|
|
||||||
def xml2str(root, pretty_print=False, strip_comments=False):
|
def xml2str(root, pretty_print=False, strip_comments=False):
|
||||||
_prepare_xml_for_serialization(root)
|
|
||||||
ans = etree.tostring(root, encoding='utf-8', xml_declaration=True,
|
ans = etree.tostring(root, encoding='utf-8', xml_declaration=True,
|
||||||
pretty_print=pretty_print)
|
pretty_print=pretty_print)
|
||||||
|
|
||||||
@ -287,7 +283,6 @@ def xml2str(root, pretty_print=False, strip_comments=False):
|
|||||||
|
|
||||||
|
|
||||||
def xml2unicode(root, pretty_print=False):
|
def xml2unicode(root, pretty_print=False):
|
||||||
_prepare_xml_for_serialization(root)
|
|
||||||
return etree.tostring(root, pretty_print=pretty_print)
|
return etree.tostring(root, pretty_print=pretty_print)
|
||||||
|
|
||||||
ASCII_CHARS = set(chr(x) for x in xrange(128))
|
ASCII_CHARS = set(chr(x) for x in xrange(128))
|
||||||
@ -321,6 +316,25 @@ def urlnormalize(href):
|
|||||||
parts = (urlquote(part) for part in parts)
|
parts = (urlquote(part) for part in parts)
|
||||||
return urlunparse(parts)
|
return urlunparse(parts)
|
||||||
|
|
||||||
|
class DummyHandler(logging.Handler):
|
||||||
|
|
||||||
|
def __init__(self):
|
||||||
|
logging.Handler.__init__(self, logging.WARNING)
|
||||||
|
self.setFormatter(logging.Formatter('%(message)s'))
|
||||||
|
self.log = None
|
||||||
|
|
||||||
|
def emit(self, record):
|
||||||
|
if self.log is not None:
|
||||||
|
msg = self.format(record)
|
||||||
|
f = self.log.error if record.levelno >= logging.ERROR \
|
||||||
|
else self.log.warn
|
||||||
|
f(msg)
|
||||||
|
|
||||||
|
|
||||||
|
_css_logger = logging.getLogger('calibre.css')
|
||||||
|
_css_logger.setLevel(logging.WARNING)
|
||||||
|
_css_log_handler = DummyHandler()
|
||||||
|
_css_logger.addHandler(_css_log_handler)
|
||||||
|
|
||||||
class OEBError(Exception):
|
class OEBError(Exception):
|
||||||
"""Generic OEB-processing error."""
|
"""Generic OEB-processing error."""
|
||||||
@ -778,7 +792,8 @@ class Manifest(object):
|
|||||||
data = self.oeb.css_preprocessor(data)
|
data = self.oeb.css_preprocessor(data)
|
||||||
data = XHTML_CSS_NAMESPACE + data
|
data = XHTML_CSS_NAMESPACE + data
|
||||||
parser = CSSParser(loglevel=logging.WARNING,
|
parser = CSSParser(loglevel=logging.WARNING,
|
||||||
fetcher=self._fetch_css)
|
fetcher=self._fetch_css,
|
||||||
|
log=_css_logger)
|
||||||
data = parser.parseString(data, href=self.href)
|
data = parser.parseString(data, href=self.href)
|
||||||
data.namespaces['h'] = XHTML_NS
|
data.namespaces['h'] = XHTML_NS
|
||||||
return data
|
return data
|
||||||
@ -1435,7 +1450,7 @@ class OEBBook(object):
|
|||||||
:attr:`pages`: List of "pages," such as indexed to a print edition of
|
:attr:`pages`: List of "pages," such as indexed to a print edition of
|
||||||
the same text.
|
the same text.
|
||||||
"""
|
"""
|
||||||
|
_css_log_handler.log = logger
|
||||||
self.encoding = encoding
|
self.encoding = encoding
|
||||||
self.html_preprocessor = html_preprocessor
|
self.html_preprocessor = html_preprocessor
|
||||||
self.css_preprocessor = css_preprocessor
|
self.css_preprocessor = css_preprocessor
|
||||||
@ -1450,6 +1465,7 @@ class OEBBook(object):
|
|||||||
self.guide = Guide(self)
|
self.guide = Guide(self)
|
||||||
self.toc = TOC()
|
self.toc = TOC()
|
||||||
self.pages = PageList()
|
self.pages = PageList()
|
||||||
|
self.auto_generated_toc = True
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def generate(cls, opts):
|
def generate(cls, opts):
|
||||||
|
@ -13,13 +13,12 @@ from PyQt4.Qt import QFontDatabase
|
|||||||
|
|
||||||
from calibre.customize.ui import available_input_formats
|
from calibre.customize.ui import available_input_formats
|
||||||
from calibre.ebooks.epub.from_html import TITLEPAGE
|
from calibre.ebooks.epub.from_html import TITLEPAGE
|
||||||
from calibre.ebooks.metadata.opf2 import OPF, OPFCreator
|
from calibre.ebooks.metadata.opf2 import OPF
|
||||||
from calibre.ptempfile import TemporaryDirectory
|
from calibre.ptempfile import TemporaryDirectory
|
||||||
from calibre.ebooks.chardet import xml_to_unicode
|
from calibre.ebooks.chardet import xml_to_unicode
|
||||||
from calibre.utils.zipfile import safe_replace, ZipFile
|
from calibre.utils.zipfile import safe_replace, ZipFile
|
||||||
from calibre.utils.config import DynamicConfig
|
from calibre.utils.config import DynamicConfig
|
||||||
from calibre.utils.logging import Log
|
from calibre.utils.logging import Log
|
||||||
from calibre import CurrentDir
|
|
||||||
|
|
||||||
def character_count(html):
|
def character_count(html):
|
||||||
'''
|
'''
|
||||||
@ -57,31 +56,21 @@ class FakeOpts(object):
|
|||||||
max_levels = 5
|
max_levels = 5
|
||||||
input_encoding = None
|
input_encoding = None
|
||||||
|
|
||||||
def html2opf(path, tdir, log):
|
|
||||||
from calibre.ebooks.html.input import get_filelist
|
|
||||||
from calibre.ebooks.metadata.meta import get_metadata
|
|
||||||
with CurrentDir(tdir):
|
|
||||||
fl = get_filelist(path, tdir, FakeOpts(), log)
|
|
||||||
mi = get_metadata(open(path, 'rb'), 'html')
|
|
||||||
mi = OPFCreator(os.getcwdu(), mi)
|
|
||||||
mi.guide = None
|
|
||||||
entries = [(f.path, 'application/xhtml+xml') for f in fl]
|
|
||||||
mi.create_manifest(entries)
|
|
||||||
mi.create_spine([f.path for f in fl])
|
|
||||||
|
|
||||||
mi.render(open('metadata.opf', 'wb'))
|
|
||||||
opfpath = os.path.abspath('metadata.opf')
|
|
||||||
|
|
||||||
return opfpath
|
|
||||||
|
|
||||||
def opf2opf(path, tdir, opts):
|
|
||||||
return path
|
|
||||||
|
|
||||||
def is_supported(path):
|
def is_supported(path):
|
||||||
ext = os.path.splitext(path)[1].replace('.', '').lower()
|
ext = os.path.splitext(path)[1].replace('.', '').lower()
|
||||||
ext = re.sub(r'(x{0,1})htm(l{0,1})', 'html', ext)
|
ext = re.sub(r'(x{0,1})htm(l{0,1})', 'html', ext)
|
||||||
return ext in available_input_formats()
|
return ext in available_input_formats()
|
||||||
|
|
||||||
|
|
||||||
|
def write_oebbook(oeb, path):
|
||||||
|
from calibre.ebooks.oeb.writer import OEBWriter
|
||||||
|
from calibre import walk
|
||||||
|
w = OEBWriter()
|
||||||
|
w(oeb, path)
|
||||||
|
for f in walk(path):
|
||||||
|
if f.endswith('.opf'):
|
||||||
|
return f
|
||||||
|
|
||||||
class EbookIterator(object):
|
class EbookIterator(object):
|
||||||
|
|
||||||
CHARACTERS_PER_PAGE = 1000
|
CHARACTERS_PER_PAGE = 1000
|
||||||
@ -131,17 +120,16 @@ class EbookIterator(object):
|
|||||||
def __enter__(self):
|
def __enter__(self):
|
||||||
self._tdir = TemporaryDirectory('_ebook_iter')
|
self._tdir = TemporaryDirectory('_ebook_iter')
|
||||||
self.base = self._tdir.__enter__()
|
self.base = self._tdir.__enter__()
|
||||||
if self.ebook_ext == 'opf':
|
|
||||||
self.pathtoopf = self.pathtoebook
|
|
||||||
elif self.ebook_ext == 'html':
|
|
||||||
self.pathtoopf = html2opf(self.pathtoebook, self.base, self.log)
|
|
||||||
else:
|
|
||||||
from calibre.ebooks.conversion.plumber import Plumber
|
from calibre.ebooks.conversion.plumber import Plumber
|
||||||
plumber = Plumber(self.pathtoebook, self.base, self.log)
|
plumber = Plumber(self.pathtoebook, self.base, self.log)
|
||||||
plumber.setup_options()
|
plumber.setup_options()
|
||||||
|
if hasattr(plumber.opts, 'dont_package'):
|
||||||
|
plumber.opts.dont_package = True
|
||||||
self.pathtoopf = plumber.input_plugin(open(plumber.input, 'rb'),
|
self.pathtoopf = plumber.input_plugin(open(plumber.input, 'rb'),
|
||||||
plumber.opts, plumber.input_fmt, self.log,
|
plumber.opts, plumber.input_fmt, self.log,
|
||||||
{}, self.base)
|
{}, self.base)
|
||||||
|
if hasattr(self.pathtoopf, 'manifest'):
|
||||||
|
self.pathtoopf = write_oebbook(self.pathtoebook, self._tdir)
|
||||||
|
|
||||||
|
|
||||||
self.opf = OPF(self.pathtoopf, os.path.dirname(self.pathtoopf))
|
self.opf = OPF(self.pathtoopf, os.path.dirname(self.pathtoopf))
|
||||||
|
@ -16,7 +16,6 @@ class OEBOutput(OutputFormatPlugin):
|
|||||||
author = 'Kovid Goyal'
|
author = 'Kovid Goyal'
|
||||||
file_type = 'oeb'
|
file_type = 'oeb'
|
||||||
|
|
||||||
|
|
||||||
def convert(self, oeb_book, output_path, input_plugin, opts, log):
|
def convert(self, oeb_book, output_path, input_plugin, opts, log):
|
||||||
self.log, self.opts = log, opts
|
self.log, self.opts = log, opts
|
||||||
if not os.path.exists(output_path):
|
if not os.path.exists(output_path):
|
||||||
|
@ -349,6 +349,7 @@ class OEBReader(object):
|
|||||||
def _toc_from_ncx(self, item):
|
def _toc_from_ncx(self, item):
|
||||||
if item is None:
|
if item is None:
|
||||||
return False
|
return False
|
||||||
|
self.log.debug('Reading TOC from NCX...')
|
||||||
ncx = item.data
|
ncx = item.data
|
||||||
title = ''.join(xpath(ncx, 'ncx:docTitle/ncx:text/text()'))
|
title = ''.join(xpath(ncx, 'ncx:docTitle/ncx:text/text()'))
|
||||||
title = COLLAPSE_RE.sub(' ', title.strip())
|
title = COLLAPSE_RE.sub(' ', title.strip())
|
||||||
@ -364,6 +365,7 @@ class OEBReader(object):
|
|||||||
result = xpath(opf, 'o2:tours/o2:tour')
|
result = xpath(opf, 'o2:tours/o2:tour')
|
||||||
if not result:
|
if not result:
|
||||||
return False
|
return False
|
||||||
|
self.log.debug('Reading TOC from tour...')
|
||||||
tour = result[0]
|
tour = result[0]
|
||||||
toc = self.oeb.toc
|
toc = self.oeb.toc
|
||||||
toc.title = tour.get('title')
|
toc.title = tour.get('title')
|
||||||
@ -384,6 +386,7 @@ class OEBReader(object):
|
|||||||
def _toc_from_html(self, opf):
|
def _toc_from_html(self, opf):
|
||||||
if 'toc' not in self.oeb.guide:
|
if 'toc' not in self.oeb.guide:
|
||||||
return False
|
return False
|
||||||
|
self.log.debug('Reading TOC from HTML...')
|
||||||
itempath, frag = urldefrag(self.oeb.guide['toc'].href)
|
itempath, frag = urldefrag(self.oeb.guide['toc'].href)
|
||||||
item = self.oeb.manifest.hrefs[itempath]
|
item = self.oeb.manifest.hrefs[itempath]
|
||||||
html = item.data
|
html = item.data
|
||||||
@ -414,6 +417,7 @@ class OEBReader(object):
|
|||||||
return True
|
return True
|
||||||
|
|
||||||
def _toc_from_spine(self, opf):
|
def _toc_from_spine(self, opf):
|
||||||
|
self.log.warn('Generating default TOC from spine...')
|
||||||
toc = self.oeb.toc
|
toc = self.oeb.toc
|
||||||
titles = []
|
titles = []
|
||||||
headers = []
|
headers = []
|
||||||
@ -441,11 +445,14 @@ class OEBReader(object):
|
|||||||
return True
|
return True
|
||||||
|
|
||||||
def _toc_from_opf(self, opf, item):
|
def _toc_from_opf(self, opf, item):
|
||||||
|
self.oeb.auto_generated_toc = False
|
||||||
if self._toc_from_ncx(item): return
|
if self._toc_from_ncx(item): return
|
||||||
if self._toc_from_tour(opf): return
|
# Prefer HTML to tour based TOC, since several LIT files
|
||||||
self.logger.warn('No metadata table of contents found')
|
# have good HTML TOCs but bad tour based TOCs
|
||||||
if self._toc_from_html(opf): return
|
if self._toc_from_html(opf): return
|
||||||
|
if self._toc_from_tour(opf): return
|
||||||
self._toc_from_spine(opf)
|
self._toc_from_spine(opf)
|
||||||
|
self.oeb.auto_generated_toc = True
|
||||||
|
|
||||||
def _pages_from_ncx(self, opf, item):
|
def _pages_from_ncx(self, opf, item):
|
||||||
if item is None:
|
if item is None:
|
||||||
|
@ -51,8 +51,8 @@ class Split(object):
|
|||||||
self.log = oeb.log
|
self.log = oeb.log
|
||||||
self.map = {}
|
self.map = {}
|
||||||
self.page_break_selectors = None
|
self.page_break_selectors = None
|
||||||
for item in self.oeb.manifest.items:
|
for item in list(self.oeb.manifest.items):
|
||||||
if etree.iselement(item.data):
|
if item.spine_position is not None and etree.iselement(item.data):
|
||||||
self.split_item(item)
|
self.split_item(item)
|
||||||
|
|
||||||
self.fix_links()
|
self.fix_links()
|
||||||
@ -74,7 +74,6 @@ class Split(object):
|
|||||||
self.page_break_selectors = set([])
|
self.page_break_selectors = set([])
|
||||||
stylesheets = [x.data for x in self.oeb.manifest if x.media_type in
|
stylesheets = [x.data for x in self.oeb.manifest if x.media_type in
|
||||||
OEB_STYLES]
|
OEB_STYLES]
|
||||||
page_break_selectors = set([])
|
|
||||||
for rule in rules(stylesheets):
|
for rule in rules(stylesheets):
|
||||||
before = getattr(rule.style.getPropertyCSSValue(
|
before = getattr(rule.style.getPropertyCSSValue(
|
||||||
'page-break-before'), 'cssText', '').strip().lower()
|
'page-break-before'), 'cssText', '').strip().lower()
|
||||||
@ -82,20 +81,24 @@ class Split(object):
|
|||||||
'page-break-after'), 'cssText', '').strip().lower()
|
'page-break-after'), 'cssText', '').strip().lower()
|
||||||
try:
|
try:
|
||||||
if before and before != 'avoid':
|
if before and before != 'avoid':
|
||||||
page_break_selectors.add((CSSSelector(rule.selectorText),
|
self.page_break_selectors.add((CSSSelector(rule.selectorText),
|
||||||
True))
|
True))
|
||||||
except:
|
except:
|
||||||
pass
|
pass
|
||||||
try:
|
try:
|
||||||
if after and after != 'avoid':
|
if after and after != 'avoid':
|
||||||
page_break_selectors.add((CSSSelector(rule.selectorText),
|
self.page_break_selectors.add((CSSSelector(rule.selectorText),
|
||||||
False))
|
False))
|
||||||
except:
|
except:
|
||||||
pass
|
pass
|
||||||
|
|
||||||
page_breaks = set([])
|
page_breaks = set([])
|
||||||
for selector, before in page_break_selectors:
|
for selector, before in self.page_break_selectors:
|
||||||
for elem in selector(item.data):
|
body = item.data.xpath('//h:body', namespaces=NAMESPACES)
|
||||||
|
if not body:
|
||||||
|
continue
|
||||||
|
for elem in selector(body[0]):
|
||||||
|
if elem not in body:
|
||||||
if before:
|
if before:
|
||||||
elem.set('pb_before', '1')
|
elem.set('pb_before', '1')
|
||||||
page_breaks.add(elem)
|
page_breaks.add(elem)
|
||||||
@ -136,8 +139,10 @@ class Split(object):
|
|||||||
if href in self.map:
|
if href in self.map:
|
||||||
anchor_map = self.map[href]
|
anchor_map = self.map[href]
|
||||||
nhref = anchor_map[frag if frag else None]
|
nhref = anchor_map[frag if frag else None]
|
||||||
|
nhref = self.current_item.relhref(nhref)
|
||||||
if frag:
|
if frag:
|
||||||
nhref = '#'.join(href, frag)
|
nhref = '#'.join((nhref, frag))
|
||||||
|
|
||||||
return nhref
|
return nhref
|
||||||
return url
|
return url
|
||||||
|
|
||||||
@ -153,7 +158,7 @@ class FlowSplitter(object):
|
|||||||
self.page_breaks = page_breaks
|
self.page_breaks = page_breaks
|
||||||
self.page_break_ids = page_break_ids
|
self.page_break_ids = page_break_ids
|
||||||
self.max_flow_size = max_flow_size
|
self.max_flow_size = max_flow_size
|
||||||
self.base = item.abshref(item.href)
|
self.base = item.href
|
||||||
|
|
||||||
base, ext = os.path.splitext(self.base)
|
base, ext = os.path.splitext(self.base)
|
||||||
self.base = base.replace('%', '%%')+'_split_%d'+ext
|
self.base = base.replace('%', '%%')+'_split_%d'+ext
|
||||||
@ -192,9 +197,9 @@ class FlowSplitter(object):
|
|||||||
self.trees = []
|
self.trees = []
|
||||||
tree = orig_tree
|
tree = orig_tree
|
||||||
for pattern, before in ordered_ids:
|
for pattern, before in ordered_ids:
|
||||||
self.log.debug('\t\tSplitting on page-break')
|
|
||||||
elem = pattern(tree)
|
elem = pattern(tree)
|
||||||
if elem:
|
if elem:
|
||||||
|
self.log.debug('\t\tSplitting on page-break')
|
||||||
before, after = self.do_split(tree, elem[0], before)
|
before, after = self.do_split(tree, elem[0], before)
|
||||||
self.trees.append(before)
|
self.trees.append(before)
|
||||||
tree = after
|
tree = after
|
||||||
@ -414,13 +419,14 @@ class FlowSplitter(object):
|
|||||||
elem.attrib.pop(SPLIT_ATTR, None)
|
elem.attrib.pop(SPLIT_ATTR, None)
|
||||||
elem.attrib.pop(SPLIT_POINT_ATTR, '0')
|
elem.attrib.pop(SPLIT_POINT_ATTR, '0')
|
||||||
|
|
||||||
spine_pos = self.item.spine_pos
|
spine_pos = self.item.spine_position
|
||||||
for current, tree in zip(map(reversed, (self.files, self.trees))):
|
for current, tree in zip(*map(reversed, (self.files, self.trees))):
|
||||||
for a in tree.getroot().xpath('//h:a[@href]', namespaces=NAMESPACES):
|
for a in tree.getroot().xpath('//h:a[@href]', namespaces=NAMESPACES):
|
||||||
href = a.get('href').strip()
|
href = a.get('href').strip()
|
||||||
if href.startswith('#'):
|
if href.startswith('#'):
|
||||||
anchor = href[1:]
|
anchor = href[1:]
|
||||||
file = self.anchor_map[anchor]
|
file = self.anchor_map[anchor]
|
||||||
|
file = self.item.relhref(file)
|
||||||
if file != current:
|
if file != current:
|
||||||
a.set('href', file+href)
|
a.set('href', file+href)
|
||||||
|
|
||||||
@ -430,12 +436,12 @@ class FlowSplitter(object):
|
|||||||
self.oeb.spine.insert(spine_pos, new_item, self.item.linear)
|
self.oeb.spine.insert(spine_pos, new_item, self.item.linear)
|
||||||
|
|
||||||
if self.oeb.guide:
|
if self.oeb.guide:
|
||||||
for ref in self.oeb.guide:
|
for ref in self.oeb.guide.values():
|
||||||
href, frag = urldefrag(ref.href)
|
href, frag = urldefrag(ref.href)
|
||||||
if href == self.item.href:
|
if href == self.item.href:
|
||||||
nhref = self.anchor_map[frag if frag else None]
|
nhref = self.anchor_map[frag if frag else None]
|
||||||
if frag:
|
if frag:
|
||||||
nhref = '#'.join(nhref, frag)
|
nhref = '#'.join((nhref, frag))
|
||||||
ref.href = nhref
|
ref.href = nhref
|
||||||
|
|
||||||
def fix_toc_entry(toc):
|
def fix_toc_entry(toc):
|
||||||
@ -444,7 +450,7 @@ class FlowSplitter(object):
|
|||||||
if href == self.item.href:
|
if href == self.item.href:
|
||||||
nhref = self.anchor_map[frag if frag else None]
|
nhref = self.anchor_map[frag if frag else None]
|
||||||
if frag:
|
if frag:
|
||||||
nhref = '#'.join(nhref, frag)
|
nhref = '#'.join((nhref, frag))
|
||||||
toc.href = nhref
|
toc.href = nhref
|
||||||
for x in toc:
|
for x in toc:
|
||||||
fix_toc_entry(x)
|
fix_toc_entry(x)
|
||||||
|
@ -49,7 +49,7 @@ class OEBWriter(object):
|
|||||||
|
|
||||||
def __call__(self, oeb, path):
|
def __call__(self, oeb, path):
|
||||||
"""
|
"""
|
||||||
Read the book in the :class:`OEBBook` object :param:`oeb` to a file
|
Write the book in the :class:`OEBBook` object :param:`oeb` to a folder
|
||||||
at :param:`path`.
|
at :param:`path`.
|
||||||
"""
|
"""
|
||||||
version = int(self.version[0])
|
version = int(self.version[0])
|
||||||
|
Loading…
x
Reference in New Issue
Block a user