Added LIT input plugin. Ported splitting code now works (at least on the handful of files I've tested)

This commit is contained in:
Kovid Goyal 2009-04-18 01:01:18 -07:00
parent b9f80aa229
commit 3e29dfbe56
13 changed files with 209 additions and 137 deletions

View File

@ -263,14 +263,14 @@ class MOBIMetadataWriter(MetadataWriterPlugin):
def set_metadata(self, stream, mi, type):
from calibre.ebooks.metadata.mobi import set_metadata
set_metadata(stream, mi)
class PDFMetadataWriter(MetadataWriterPlugin):
name = 'Set PDF metadata'
file_types = set(['pdf'])
description = _('Set metadata in %s files') % 'PDF'
author = 'John Schember'
def set_metadata(self, stream, mi, type):
from calibre.ebooks.metadata.pdf import set_metadata
set_metadata(stream, mi)
@ -280,6 +280,7 @@ from calibre.ebooks.epub.input import EPUBInput
from calibre.ebooks.mobi.input import MOBIInput
from calibre.ebooks.pdf.input import PDFInput
from calibre.ebooks.txt.input import TXTInput
from calibre.ebooks.lit.input import LITInput
from calibre.ebooks.html.input import HTMLInput
from calibre.ebooks.oeb.output import OEBOutput
from calibre.ebooks.txt.output import TXTOutput
@ -287,7 +288,7 @@ from calibre.ebooks.pdf.output import PDFOutput
from calibre.customize.profiles import input_profiles, output_profiles
plugins = [HTML2ZIP, EPUBInput, MOBIInput, PDFInput, HTMLInput,
TXTInput, OEBOutput, TXTOutput, PDFOutput]
TXTInput, OEBOutput, TXTOutput, PDFOutput, LITInput]
plugins += [x for x in list(locals().values()) if isinstance(x, type) and \
x.__name__.endswith('MetadataReader')]
plugins += [x for x in list(locals().values()) if isinstance(x, type) and \

View File

@ -41,6 +41,11 @@ class ConversionOption(object):
def __eq__(self, other):
return hash(self) == hash(other)
def clone(self):
return ConversionOption(name=self.name, help=self.help,
long_switch=self.long_switch, short_switch=self.short_switch,
choices=self.choices)
class OptionRecommendation(object):
LOW = 1
MED = 2
@ -59,6 +64,10 @@ class OptionRecommendation(object):
self.validate_parameters()
def clone(self):
return OptionRecommendation(recommended_value=self.recommended_value,
level=self.level, option=self.option.clone())
def validate_parameters(self):
if self.option.choices and self.recommended_value not in \
self.option.choices:
@ -170,8 +179,14 @@ class InputFormatPlugin(Plugin):
options.debug_input = os.path.abspath(options.debug_input)
if not os.path.exists(options.debug_input):
os.makedirs(options.debug_input)
shutil.rmtree(options.debug_input)
shutil.copytree(output_dir, options.debug_input)
if isinstance(ret, basestring):
shutil.rmtree(options.debug_input)
shutil.copytree(output_dir, options.debug_input)
else:
from calibre.ebooks.oeb.writer import OEBWriter
w = OEBWriter(pretty_print=options.pretty_print)
w(ret, options.debug_input)
log.info('Input debug saved to:', options.debug_input)
return ret

View File

@ -57,7 +57,7 @@ def check_command_line_options(parser, args, log):
raise SystemExit(1)
output = args[2]
if output.startswith('.'):
if output.startswith('.') and output != '.':
output = os.path.splitext(os.path.basename(input))[0]+output
output = os.path.abspath(output)
@ -171,7 +171,8 @@ def main(args=sys.argv):
plumber.run()
log(_('Output saved to'), ' ', plumber.output)
if plumber.opts.debug_input is None:
log(_('Output saved to'), ' ', plumber.output)
return 0

View File

@ -32,8 +32,8 @@ class Plumber(object):
:param input: Path to input file.
:param output: Path to output file/directory
'''
self.input = input
self.output = output
self.input = os.path.abspath(input)
self.output = os.path.abspath(output)
self.log = log
# Initialize the conversion options that are independent of input and
@ -188,15 +188,15 @@ OptionRecommendation(name='language',
]
input_fmt = os.path.splitext(input)[1]
input_fmt = os.path.splitext(self.input)[1]
if not input_fmt:
raise ValueError('Input file must have an extension')
input_fmt = input_fmt[1:].lower()
if os.path.exists(output) and os.path.isdir(output):
if os.path.exists(self.output) and os.path.isdir(self.output):
output_fmt = 'oeb'
else:
output_fmt = os.path.splitext(output)[1]
output_fmt = os.path.splitext(self.output)[1]
if not output_fmt:
output_fmt = '.oeb'
output_fmt = output_fmt[1:].lower()
@ -323,6 +323,9 @@ OptionRecommendation(name='language',
self.oeb = self.input_plugin(open(self.input, 'rb'), self.opts,
self.input_fmt, self.log,
accelerators, tdir)
if self.opts.debug_input is not None:
self.log('Debug input called, aborting the rest of the pipeline.')
return
if not hasattr(self.oeb, 'manifest'):
self.oeb = create_oebbook(self.log, self.oeb, self.opts)
@ -365,18 +368,20 @@ OptionRecommendation(name='language',
self.output_plugin.convert(self.oeb, self.output, self.input_plugin,
self.opts, self.log)
def create_oebbook(log, opfpath, opts):
def create_oebbook(log, path_or_stream, opts, reader=None):
'''
Create an OEBBook from an OPF file.
Create an OEBBook.
'''
from calibre.ebooks.oeb.reader import OEBReader
from calibre.ebooks.oeb.base import OEBBook
html_preprocessor = HTMLPreProcessor()
reader = OEBReader()
oeb = OEBBook(log, html_preprocessor=html_preprocessor,
pretty_print=opts.pretty_print)
# Read OEB Book into OEBBook
log.info('Parsing all content...')
reader(oeb, opfpath)
log('Parsing all content...')
if reader is None:
from calibre.ebooks.oeb.reader import OEBReader
reader = OEBReader
reader()(oeb, path_or_stream)
return oeb

View File

@ -252,6 +252,14 @@ class HTMLInput(InputFormatPlugin):
)
),
OptionRecommendation(name='dont_package',
recommended_value=False, level=OptionRecommendation.LOW,
help=_('Normally this input plugin re-arranges all the input '
'files into a standard folder hierarchy. Only use this option '
'if you know what you are doing as it can result in various '
'nasty side effects in the rest of of the conversion pipeline.'
)
),
])
def convert(self, stream, opts, file_ext, log,
@ -276,6 +284,9 @@ class HTMLInput(InputFormatPlugin):
mi.render(open('metadata.opf', 'wb'))
opfpath = os.path.abspath('metadata.opf')
if opts.dont_package:
return opfpath
from calibre.ebooks.conversion.plumber import create_oebbook
oeb = create_oebbook(log, opfpath, opts)

View File

@ -0,0 +1,24 @@
#!/usr/bin/env python
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
from __future__ import with_statement
__license__ = 'GPL v3'
__copyright__ = '2009, Kovid Goyal <kovid@kovidgoyal.net>'
__docformat__ = 'restructuredtext en'
from calibre.customize.conversion import InputFormatPlugin
class LITInput(InputFormatPlugin):
name = 'LIT Input'
author = 'Marshall T. Vandegrift'
description = 'Convert LIT files to HTML'
file_types = set(['lit'])
def convert(self, stream, options, file_ext, log,
accelerators):
from calibre.ebooks.lit.reader import LitReader
from calibre.ebooks.conversion.plumber import create_oebbook
return create_oebbook(log, stream, options, reader=LitReader)

View File

@ -7,13 +7,12 @@ __license__ = 'GPL v3'
__copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net> ' \
'and Marshall T. Vandegrift <llasram@gmail.com>'
import sys, struct, os
import struct, os
import functools
import re
from urlparse import urldefrag
from cStringIO import StringIO
from urllib import unquote as urlunquote
from lxml import etree
from calibre.ebooks.lit import LitError
from calibre.ebooks.lit.maps import OPF_MAP, HTML_MAP
import calibre.ebooks.lit.mssha1 as mssha1
@ -29,12 +28,12 @@ __all__ = ["LitReader"]
XML_DECL = """<?xml version="1.0" encoding="UTF-8" ?>
"""
OPF_DECL = """<?xml version="1.0" encoding="UTF-8" ?>
<!DOCTYPE package
<!DOCTYPE package
PUBLIC "+//ISBN 0-9673008-1-9//DTD OEB 1.0.1 Package//EN"
"http://openebook.org/dtds/oeb-1.0.1/oebpkg101.dtd">
"""
HTML_DECL = """<?xml version="1.0" encoding="UTF-8" ?>
<!DOCTYPE html PUBLIC
<!DOCTYPE html PUBLIC
"+//ISBN 0-9673008-1-9//DTD OEB 1.0.1 Document//EN"
"http://openebook.org/dtds/oeb-1.0.1/oebdoc101.dtd">
"""
@ -73,7 +72,7 @@ def encint(bytes, remaining):
val <<= 7
val |= (b & 0x7f)
if b & 0x80 == 0: break
return val, bytes[pos:], remaining
return val, bytes[pos:], remaining
def msguid(bytes):
values = struct.unpack("<LHHBBBBBBBB", bytes[:16])
@ -123,7 +122,7 @@ class UnBinary(object):
CLOSE_ANGLE_RE = re.compile(r'(?<!--)>>(?=>>|[^>])')
DOUBLE_ANGLE_RE = re.compile(r'([<>])\1')
EMPTY_ATOMS = ({},{})
def __init__(self, bin, path, manifest={}, map=HTML_MAP, atoms=EMPTY_ATOMS):
self.manifest = manifest
self.tag_map, self.attr_map, self.tag_to_attr_map = map
@ -143,7 +142,7 @@ class UnBinary(object):
raw = self.CLOSE_ANGLE_RE.sub(r'&gt;', raw)
raw = self.DOUBLE_ANGLE_RE.sub(r'\1', raw)
self.raw = raw
def item_path(self, internal_id):
try:
target = self.manifest[internal_id].path
@ -159,7 +158,7 @@ class UnBinary(object):
index += 1
relpath = (['..'] * (len(base) - index)) + target[index:]
return '/'.join(relpath)
def __unicode__(self):
return self.raw.decode('utf-8')
@ -172,11 +171,11 @@ class UnBinary(object):
in_censorship = is_goingdown = False
state = 'text'
flags = 0
while index < len(bin):
c, index = read_utf8_char(bin, index)
oc = ord(c)
if state == 'text':
if oc == 0:
state = 'get flags'
@ -188,14 +187,14 @@ class UnBinary(object):
elif c == '<':
c = '<<'
buf.write(encode(c))
elif state == 'get flags':
if oc == 0:
state = 'text'
continue
flags = oc
state = 'get tag'
elif state == 'get tag':
state = 'text' if oc == 0 else 'get attr'
if flags & FLAG_OPENING:
@ -226,7 +225,7 @@ class UnBinary(object):
if depth == 0:
raise LitError('Extra closing tag')
return index
elif state == 'get attr':
in_censorship = False
if oc == 0:
@ -265,7 +264,7 @@ class UnBinary(object):
state = 'get href length'
else:
state = 'get value length'
elif state == 'get value length':
if not in_censorship:
buf.write('"')
@ -281,7 +280,7 @@ class UnBinary(object):
continue
if count < 0 or count > (len(bin) - index):
raise LitError('Invalid character count %d' % count)
elif state == 'get value':
if count == 0xfffe:
if not in_censorship:
@ -301,7 +300,7 @@ class UnBinary(object):
buf.write('"')
in_censorship = False
state = 'get attr'
elif state == 'get custom length':
count = oc - 1
if count <= 0 or count > len(bin)-index:
@ -309,21 +308,21 @@ class UnBinary(object):
dynamic_tag += 1
state = 'get custom'
tag_name = ''
elif state == 'get custom':
tag_name += c
count -= 1
if count == 0:
buf.write(encode(tag_name))
state = 'get attr'
elif state == 'get attr length':
count = oc - 1
if count <= 0 or count > (len(bin) - index):
raise LitError('Invalid character count %d' % count)
buf.write(' ')
state = 'get custom attr'
elif state == 'get custom attr':
buf.write(encode(c))
count -= 1
@ -337,7 +336,7 @@ class UnBinary(object):
raise LitError('Invalid character count %d' % count)
href = ''
state = 'get href'
elif state == 'get href':
href += c
count -= 1
@ -350,7 +349,7 @@ class UnBinary(object):
buf.write(encode(u'"%s"' % path))
state = 'get attr'
return index
class DirectoryEntry(object):
def __init__(self, name, section, offset, size):
@ -358,11 +357,11 @@ class DirectoryEntry(object):
self.section = section
self.offset = offset
self.size = size
def __repr__(self):
return "DirectoryEntry(name=%s, section=%d, offset=%d, size=%d)" \
% (repr(self.name), self.section, self.offset, self.size)
def __str__(self):
return repr(self)
@ -382,12 +381,12 @@ class ManifestItem(object):
path = os.path.normpath(path).replace('\\', '/')
while path.startswith('../'): path = path[3:]
self.path = path
def __eq__(self, other):
if hasattr(other, 'internal'):
return self.internal == other.internal
return self.internal == other
def __repr__(self):
return "ManifestItem(internal=%r, path=%r, mime_type=%r, " \
"offset=%d, root=%r, state=%r)" \
@ -404,7 +403,7 @@ def preserve(function):
self.stream.seek(opos)
functools.update_wrapper(wrapper, function)
return wrapper
class LitFile(object):
PIECE_SIZE = 16
@ -438,14 +437,14 @@ class LitFile(object):
return self.stream.read(8)
return property(fget=fget)
magic = magic()
def version():
def fget(self):
self.stream.seek(8)
return u32(self.stream.read(4))
return property(fget=fget)
version = version()
def hdr_len():
@preserve
def fget(self):
@ -453,7 +452,7 @@ class LitFile(object):
return int32(self.stream.read(4))
return property(fget=fget)
hdr_len = hdr_len()
def num_pieces():
@preserve
def fget(self):
@ -461,7 +460,7 @@ class LitFile(object):
return int32(self.stream.read(4))
return property(fget=fget)
num_pieces = num_pieces()
def sec_hdr_len():
@preserve
def fget(self):
@ -469,7 +468,7 @@ class LitFile(object):
return int32(self.stream.read(4))
return property(fget=fget)
sec_hdr_len = sec_hdr_len()
def guid():
@preserve
def fget(self):
@ -477,7 +476,7 @@ class LitFile(object):
return self.stream.read(16)
return property(fget=fget)
guid = guid()
def header():
@preserve
def fget(self):
@ -488,7 +487,7 @@ class LitFile(object):
return self.stream.read(size)
return property(fget=fget)
header = header()
@preserve
def __len__(self):
self.stream.seek(0, 2)
@ -501,7 +500,7 @@ class LitFile(object):
def read_content(self, offset, size):
return self.read_raw(self.content_offset + offset, size)
def read_secondary_header(self):
offset = self.hdr_len + (self.num_pieces * self.PIECE_SIZE)
bytes = self.read_raw(offset, self.sec_hdr_len)
@ -526,12 +525,12 @@ class LitFile(object):
if u32(bytes[offset+4+16:]):
raise LitError('This file has a 64bit content offset')
self.content_offset = u32(bytes[offset+16:])
self.timestamp = u32(bytes[offset+24:])
self.timestamp = u32(bytes[offset+24:])
self.language_id = u32(bytes[offset+28:])
offset += 48
if not hasattr(self, 'content_offset'):
raise LitError('Could not figure out the content offset')
def read_header_pieces(self):
src = self.header[self.hdr_len:]
for i in xrange(self.num_pieces):
@ -556,7 +555,7 @@ class LitFile(object):
self.piece3_guid = piece
elif i == 4:
self.piece4_guid = piece
def read_directory(self, piece):
if not piece.startswith('IFCM'):
raise LitError('Header piece #1 is not main directory.')
@ -760,9 +759,9 @@ class LitFile(object):
raise LitError("Reset table is too short")
if u32(reset_table[RESET_UCLENGTH + 4:]) != 0:
raise LitError("Reset table has 64bit value for UCLENGTH")
result = []
window_size = 14
u = u32(control[CONTROL_WINDOW_SIZE:])
while u > 0:
@ -847,13 +846,13 @@ class LitContainer(object):
def __init__(self, filename_or_stream):
self._litfile = LitFile(filename_or_stream)
def namelist(self):
return self._litfile.paths.keys()
def exists(self, name):
return urlunquote(name) in self._litfile.paths
def read(self, name):
entry = self._litfile.paths[urlunquote(name)] if name else None
if entry is None:
@ -869,7 +868,7 @@ class LitContainer(object):
internal = '/'.join(('/data', entry.internal))
content = self._litfile.get_file(internal)
return content
def _read_meta(self):
path = 'content.opf'
raw = self._litfile.get_file('/meta')

View File

@ -272,11 +272,7 @@ def XPath(expr):
def xpath(elem, expr):
return elem.xpath(expr, namespaces=XPNSMAP)
def _prepare_xml_for_serialization(root):
pass
def xml2str(root, pretty_print=False, strip_comments=False):
_prepare_xml_for_serialization(root)
ans = etree.tostring(root, encoding='utf-8', xml_declaration=True,
pretty_print=pretty_print)
@ -287,7 +283,6 @@ def xml2str(root, pretty_print=False, strip_comments=False):
def xml2unicode(root, pretty_print=False):
_prepare_xml_for_serialization(root)
return etree.tostring(root, pretty_print=pretty_print)
ASCII_CHARS = set(chr(x) for x in xrange(128))
@ -321,6 +316,25 @@ def urlnormalize(href):
parts = (urlquote(part) for part in parts)
return urlunparse(parts)
class DummyHandler(logging.Handler):
def __init__(self):
logging.Handler.__init__(self, logging.WARNING)
self.setFormatter(logging.Formatter('%(message)s'))
self.log = None
def emit(self, record):
if self.log is not None:
msg = self.format(record)
f = self.log.error if record.levelno >= logging.ERROR \
else self.log.warn
f(msg)
_css_logger = logging.getLogger('calibre.css')
_css_logger.setLevel(logging.WARNING)
_css_log_handler = DummyHandler()
_css_logger.addHandler(_css_log_handler)
class OEBError(Exception):
"""Generic OEB-processing error."""
@ -778,7 +792,8 @@ class Manifest(object):
data = self.oeb.css_preprocessor(data)
data = XHTML_CSS_NAMESPACE + data
parser = CSSParser(loglevel=logging.WARNING,
fetcher=self._fetch_css)
fetcher=self._fetch_css,
log=_css_logger)
data = parser.parseString(data, href=self.href)
data.namespaces['h'] = XHTML_NS
return data
@ -1435,7 +1450,7 @@ class OEBBook(object):
:attr:`pages`: List of "pages," such as indexed to a print edition of
the same text.
"""
_css_log_handler.log = logger
self.encoding = encoding
self.html_preprocessor = html_preprocessor
self.css_preprocessor = css_preprocessor
@ -1450,6 +1465,7 @@ class OEBBook(object):
self.guide = Guide(self)
self.toc = TOC()
self.pages = PageList()
self.auto_generated_toc = True
@classmethod
def generate(cls, opts):

View File

@ -13,13 +13,12 @@ from PyQt4.Qt import QFontDatabase
from calibre.customize.ui import available_input_formats
from calibre.ebooks.epub.from_html import TITLEPAGE
from calibre.ebooks.metadata.opf2 import OPF, OPFCreator
from calibre.ebooks.metadata.opf2 import OPF
from calibre.ptempfile import TemporaryDirectory
from calibre.ebooks.chardet import xml_to_unicode
from calibre.utils.zipfile import safe_replace, ZipFile
from calibre.utils.config import DynamicConfig
from calibre.utils.logging import Log
from calibre import CurrentDir
def character_count(html):
'''
@ -57,31 +56,21 @@ class FakeOpts(object):
max_levels = 5
input_encoding = None
def html2opf(path, tdir, log):
from calibre.ebooks.html.input import get_filelist
from calibre.ebooks.metadata.meta import get_metadata
with CurrentDir(tdir):
fl = get_filelist(path, tdir, FakeOpts(), log)
mi = get_metadata(open(path, 'rb'), 'html')
mi = OPFCreator(os.getcwdu(), mi)
mi.guide = None
entries = [(f.path, 'application/xhtml+xml') for f in fl]
mi.create_manifest(entries)
mi.create_spine([f.path for f in fl])
mi.render(open('metadata.opf', 'wb'))
opfpath = os.path.abspath('metadata.opf')
return opfpath
def opf2opf(path, tdir, opts):
return path
def is_supported(path):
ext = os.path.splitext(path)[1].replace('.', '').lower()
ext = re.sub(r'(x{0,1})htm(l{0,1})', 'html', ext)
return ext in available_input_formats()
def write_oebbook(oeb, path):
from calibre.ebooks.oeb.writer import OEBWriter
from calibre import walk
w = OEBWriter()
w(oeb, path)
for f in walk(path):
if f.endswith('.opf'):
return f
class EbookIterator(object):
CHARACTERS_PER_PAGE = 1000
@ -131,17 +120,16 @@ class EbookIterator(object):
def __enter__(self):
self._tdir = TemporaryDirectory('_ebook_iter')
self.base = self._tdir.__enter__()
if self.ebook_ext == 'opf':
self.pathtoopf = self.pathtoebook
elif self.ebook_ext == 'html':
self.pathtoopf = html2opf(self.pathtoebook, self.base, self.log)
else:
from calibre.ebooks.conversion.plumber import Plumber
plumber = Plumber(self.pathtoebook, self.base, self.log)
plumber.setup_options()
self.pathtoopf = plumber.input_plugin(open(plumber.input, 'rb'),
plumber.opts, plumber.input_fmt, self.log,
{}, self.base)
from calibre.ebooks.conversion.plumber import Plumber
plumber = Plumber(self.pathtoebook, self.base, self.log)
plumber.setup_options()
if hasattr(plumber.opts, 'dont_package'):
plumber.opts.dont_package = True
self.pathtoopf = plumber.input_plugin(open(plumber.input, 'rb'),
plumber.opts, plumber.input_fmt, self.log,
{}, self.base)
if hasattr(self.pathtoopf, 'manifest'):
self.pathtoopf = write_oebbook(self.pathtoebook, self._tdir)
self.opf = OPF(self.pathtoopf, os.path.dirname(self.pathtoopf))

View File

@ -16,7 +16,6 @@ class OEBOutput(OutputFormatPlugin):
author = 'Kovid Goyal'
file_type = 'oeb'
def convert(self, oeb_book, output_path, input_plugin, opts, log):
self.log, self.opts = log, opts
if not os.path.exists(output_path):

View File

@ -349,6 +349,7 @@ class OEBReader(object):
def _toc_from_ncx(self, item):
if item is None:
return False
self.log.debug('Reading TOC from NCX...')
ncx = item.data
title = ''.join(xpath(ncx, 'ncx:docTitle/ncx:text/text()'))
title = COLLAPSE_RE.sub(' ', title.strip())
@ -364,6 +365,7 @@ class OEBReader(object):
result = xpath(opf, 'o2:tours/o2:tour')
if not result:
return False
self.log.debug('Reading TOC from tour...')
tour = result[0]
toc = self.oeb.toc
toc.title = tour.get('title')
@ -384,6 +386,7 @@ class OEBReader(object):
def _toc_from_html(self, opf):
if 'toc' not in self.oeb.guide:
return False
self.log.debug('Reading TOC from HTML...')
itempath, frag = urldefrag(self.oeb.guide['toc'].href)
item = self.oeb.manifest.hrefs[itempath]
html = item.data
@ -414,6 +417,7 @@ class OEBReader(object):
return True
def _toc_from_spine(self, opf):
self.log.warn('Generating default TOC from spine...')
toc = self.oeb.toc
titles = []
headers = []
@ -441,11 +445,14 @@ class OEBReader(object):
return True
def _toc_from_opf(self, opf, item):
self.oeb.auto_generated_toc = False
if self._toc_from_ncx(item): return
if self._toc_from_tour(opf): return
self.logger.warn('No metadata table of contents found')
# Prefer HTML to tour based TOC, since several LIT files
# have good HTML TOCs but bad tour based TOCs
if self._toc_from_html(opf): return
if self._toc_from_tour(opf): return
self._toc_from_spine(opf)
self.oeb.auto_generated_toc = True
def _pages_from_ncx(self, opf, item):
if item is None:

View File

@ -51,8 +51,8 @@ class Split(object):
self.log = oeb.log
self.map = {}
self.page_break_selectors = None
for item in self.oeb.manifest.items:
if etree.iselement(item.data):
for item in list(self.oeb.manifest.items):
if item.spine_position is not None and etree.iselement(item.data):
self.split_item(item)
self.fix_links()
@ -74,31 +74,34 @@ class Split(object):
self.page_break_selectors = set([])
stylesheets = [x.data for x in self.oeb.manifest if x.media_type in
OEB_STYLES]
page_break_selectors = set([])
for rule in rules(stylesheets):
before = getattr(rule.style.getPropertyCSSValue(
'page-break-before'), 'cssText', '').strip().lower()
after = getattr(rule.style.getPropertyCSSValue(
'page-break-after'), 'cssText', '').strip().lower()
try:
if before and before != 'avoid':
page_break_selectors.add((CSSSelector(rule.selectorText),
True))
except:
pass
try:
if after and after != 'avoid':
page_break_selectors.add((CSSSelector(rule.selectorText),
False))
except:
pass
for rule in rules(stylesheets):
before = getattr(rule.style.getPropertyCSSValue(
'page-break-before'), 'cssText', '').strip().lower()
after = getattr(rule.style.getPropertyCSSValue(
'page-break-after'), 'cssText', '').strip().lower()
try:
if before and before != 'avoid':
self.page_break_selectors.add((CSSSelector(rule.selectorText),
True))
except:
pass
try:
if after and after != 'avoid':
self.page_break_selectors.add((CSSSelector(rule.selectorText),
False))
except:
pass
page_breaks = set([])
for selector, before in page_break_selectors:
for elem in selector(item.data):
if before:
elem.set('pb_before', '1')
page_breaks.add(elem)
for selector, before in self.page_break_selectors:
body = item.data.xpath('//h:body', namespaces=NAMESPACES)
if not body:
continue
for elem in selector(body[0]):
if elem not in body:
if before:
elem.set('pb_before', '1')
page_breaks.add(elem)
for i, elem in enumerate(item.data.iter()):
elem.set('pb_order', str(i))
@ -136,8 +139,10 @@ class Split(object):
if href in self.map:
anchor_map = self.map[href]
nhref = anchor_map[frag if frag else None]
nhref = self.current_item.relhref(nhref)
if frag:
nhref = '#'.join(href, frag)
nhref = '#'.join((nhref, frag))
return nhref
return url
@ -153,7 +158,7 @@ class FlowSplitter(object):
self.page_breaks = page_breaks
self.page_break_ids = page_break_ids
self.max_flow_size = max_flow_size
self.base = item.abshref(item.href)
self.base = item.href
base, ext = os.path.splitext(self.base)
self.base = base.replace('%', '%%')+'_split_%d'+ext
@ -192,9 +197,9 @@ class FlowSplitter(object):
self.trees = []
tree = orig_tree
for pattern, before in ordered_ids:
self.log.debug('\t\tSplitting on page-break')
elem = pattern(tree)
if elem:
self.log.debug('\t\tSplitting on page-break')
before, after = self.do_split(tree, elem[0], before)
self.trees.append(before)
tree = after
@ -414,13 +419,14 @@ class FlowSplitter(object):
elem.attrib.pop(SPLIT_ATTR, None)
elem.attrib.pop(SPLIT_POINT_ATTR, '0')
spine_pos = self.item.spine_pos
for current, tree in zip(map(reversed, (self.files, self.trees))):
spine_pos = self.item.spine_position
for current, tree in zip(*map(reversed, (self.files, self.trees))):
for a in tree.getroot().xpath('//h:a[@href]', namespaces=NAMESPACES):
href = a.get('href').strip()
if href.startswith('#'):
anchor = href[1:]
file = self.anchor_map[anchor]
file = self.item.relhref(file)
if file != current:
a.set('href', file+href)
@ -430,12 +436,12 @@ class FlowSplitter(object):
self.oeb.spine.insert(spine_pos, new_item, self.item.linear)
if self.oeb.guide:
for ref in self.oeb.guide:
for ref in self.oeb.guide.values():
href, frag = urldefrag(ref.href)
if href == self.item.href:
nhref = self.anchor_map[frag if frag else None]
if frag:
nhref = '#'.join(nhref, frag)
nhref = '#'.join((nhref, frag))
ref.href = nhref
def fix_toc_entry(toc):
@ -444,7 +450,7 @@ class FlowSplitter(object):
if href == self.item.href:
nhref = self.anchor_map[frag if frag else None]
if frag:
nhref = '#'.join(nhref, frag)
nhref = '#'.join((nhref, frag))
toc.href = nhref
for x in toc:
fix_toc_entry(x)

View File

@ -49,7 +49,7 @@ class OEBWriter(object):
def __call__(self, oeb, path):
"""
Read the book in the :class:`OEBBook` object :param:`oeb` to a file
Write the book in the :class:`OEBBook` object :param:`oeb` to a folder
at :param:`path`.
"""
version = int(self.version[0])