mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Added LIT input plugin. Ported splitting code now works (at least on the handful of files I've tested)
This commit is contained in:
parent
b9f80aa229
commit
3e29dfbe56
@ -263,14 +263,14 @@ class MOBIMetadataWriter(MetadataWriterPlugin):
|
||||
def set_metadata(self, stream, mi, type):
|
||||
from calibre.ebooks.metadata.mobi import set_metadata
|
||||
set_metadata(stream, mi)
|
||||
|
||||
|
||||
class PDFMetadataWriter(MetadataWriterPlugin):
|
||||
|
||||
name = 'Set PDF metadata'
|
||||
file_types = set(['pdf'])
|
||||
description = _('Set metadata in %s files') % 'PDF'
|
||||
author = 'John Schember'
|
||||
|
||||
|
||||
def set_metadata(self, stream, mi, type):
|
||||
from calibre.ebooks.metadata.pdf import set_metadata
|
||||
set_metadata(stream, mi)
|
||||
@ -280,6 +280,7 @@ from calibre.ebooks.epub.input import EPUBInput
|
||||
from calibre.ebooks.mobi.input import MOBIInput
|
||||
from calibre.ebooks.pdf.input import PDFInput
|
||||
from calibre.ebooks.txt.input import TXTInput
|
||||
from calibre.ebooks.lit.input import LITInput
|
||||
from calibre.ebooks.html.input import HTMLInput
|
||||
from calibre.ebooks.oeb.output import OEBOutput
|
||||
from calibre.ebooks.txt.output import TXTOutput
|
||||
@ -287,7 +288,7 @@ from calibre.ebooks.pdf.output import PDFOutput
|
||||
from calibre.customize.profiles import input_profiles, output_profiles
|
||||
|
||||
plugins = [HTML2ZIP, EPUBInput, MOBIInput, PDFInput, HTMLInput,
|
||||
TXTInput, OEBOutput, TXTOutput, PDFOutput]
|
||||
TXTInput, OEBOutput, TXTOutput, PDFOutput, LITInput]
|
||||
plugins += [x for x in list(locals().values()) if isinstance(x, type) and \
|
||||
x.__name__.endswith('MetadataReader')]
|
||||
plugins += [x for x in list(locals().values()) if isinstance(x, type) and \
|
||||
|
@ -41,6 +41,11 @@ class ConversionOption(object):
|
||||
def __eq__(self, other):
|
||||
return hash(self) == hash(other)
|
||||
|
||||
def clone(self):
|
||||
return ConversionOption(name=self.name, help=self.help,
|
||||
long_switch=self.long_switch, short_switch=self.short_switch,
|
||||
choices=self.choices)
|
||||
|
||||
class OptionRecommendation(object):
|
||||
LOW = 1
|
||||
MED = 2
|
||||
@ -59,6 +64,10 @@ class OptionRecommendation(object):
|
||||
|
||||
self.validate_parameters()
|
||||
|
||||
def clone(self):
|
||||
return OptionRecommendation(recommended_value=self.recommended_value,
|
||||
level=self.level, option=self.option.clone())
|
||||
|
||||
def validate_parameters(self):
|
||||
if self.option.choices and self.recommended_value not in \
|
||||
self.option.choices:
|
||||
@ -170,8 +179,14 @@ class InputFormatPlugin(Plugin):
|
||||
options.debug_input = os.path.abspath(options.debug_input)
|
||||
if not os.path.exists(options.debug_input):
|
||||
os.makedirs(options.debug_input)
|
||||
shutil.rmtree(options.debug_input)
|
||||
shutil.copytree(output_dir, options.debug_input)
|
||||
if isinstance(ret, basestring):
|
||||
shutil.rmtree(options.debug_input)
|
||||
shutil.copytree(output_dir, options.debug_input)
|
||||
else:
|
||||
from calibre.ebooks.oeb.writer import OEBWriter
|
||||
w = OEBWriter(pretty_print=options.pretty_print)
|
||||
w(ret, options.debug_input)
|
||||
|
||||
log.info('Input debug saved to:', options.debug_input)
|
||||
|
||||
return ret
|
||||
|
@ -57,7 +57,7 @@ def check_command_line_options(parser, args, log):
|
||||
raise SystemExit(1)
|
||||
|
||||
output = args[2]
|
||||
if output.startswith('.'):
|
||||
if output.startswith('.') and output != '.':
|
||||
output = os.path.splitext(os.path.basename(input))[0]+output
|
||||
output = os.path.abspath(output)
|
||||
|
||||
@ -171,7 +171,8 @@ def main(args=sys.argv):
|
||||
|
||||
plumber.run()
|
||||
|
||||
log(_('Output saved to'), ' ', plumber.output)
|
||||
if plumber.opts.debug_input is None:
|
||||
log(_('Output saved to'), ' ', plumber.output)
|
||||
|
||||
return 0
|
||||
|
||||
|
@ -32,8 +32,8 @@ class Plumber(object):
|
||||
:param input: Path to input file.
|
||||
:param output: Path to output file/directory
|
||||
'''
|
||||
self.input = input
|
||||
self.output = output
|
||||
self.input = os.path.abspath(input)
|
||||
self.output = os.path.abspath(output)
|
||||
self.log = log
|
||||
|
||||
# Initialize the conversion options that are independent of input and
|
||||
@ -188,15 +188,15 @@ OptionRecommendation(name='language',
|
||||
]
|
||||
|
||||
|
||||
input_fmt = os.path.splitext(input)[1]
|
||||
input_fmt = os.path.splitext(self.input)[1]
|
||||
if not input_fmt:
|
||||
raise ValueError('Input file must have an extension')
|
||||
input_fmt = input_fmt[1:].lower()
|
||||
|
||||
if os.path.exists(output) and os.path.isdir(output):
|
||||
if os.path.exists(self.output) and os.path.isdir(self.output):
|
||||
output_fmt = 'oeb'
|
||||
else:
|
||||
output_fmt = os.path.splitext(output)[1]
|
||||
output_fmt = os.path.splitext(self.output)[1]
|
||||
if not output_fmt:
|
||||
output_fmt = '.oeb'
|
||||
output_fmt = output_fmt[1:].lower()
|
||||
@ -323,6 +323,9 @@ OptionRecommendation(name='language',
|
||||
self.oeb = self.input_plugin(open(self.input, 'rb'), self.opts,
|
||||
self.input_fmt, self.log,
|
||||
accelerators, tdir)
|
||||
if self.opts.debug_input is not None:
|
||||
self.log('Debug input called, aborting the rest of the pipeline.')
|
||||
return
|
||||
if not hasattr(self.oeb, 'manifest'):
|
||||
self.oeb = create_oebbook(self.log, self.oeb, self.opts)
|
||||
|
||||
@ -365,18 +368,20 @@ OptionRecommendation(name='language',
|
||||
self.output_plugin.convert(self.oeb, self.output, self.input_plugin,
|
||||
self.opts, self.log)
|
||||
|
||||
def create_oebbook(log, opfpath, opts):
|
||||
def create_oebbook(log, path_or_stream, opts, reader=None):
|
||||
'''
|
||||
Create an OEBBook from an OPF file.
|
||||
Create an OEBBook.
|
||||
'''
|
||||
from calibre.ebooks.oeb.reader import OEBReader
|
||||
from calibre.ebooks.oeb.base import OEBBook
|
||||
html_preprocessor = HTMLPreProcessor()
|
||||
reader = OEBReader()
|
||||
oeb = OEBBook(log, html_preprocessor=html_preprocessor,
|
||||
pretty_print=opts.pretty_print)
|
||||
# Read OEB Book into OEBBook
|
||||
log.info('Parsing all content...')
|
||||
reader(oeb, opfpath)
|
||||
log('Parsing all content...')
|
||||
if reader is None:
|
||||
from calibre.ebooks.oeb.reader import OEBReader
|
||||
reader = OEBReader
|
||||
|
||||
reader()(oeb, path_or_stream)
|
||||
return oeb
|
||||
|
||||
|
@ -252,6 +252,14 @@ class HTMLInput(InputFormatPlugin):
|
||||
)
|
||||
),
|
||||
|
||||
OptionRecommendation(name='dont_package',
|
||||
recommended_value=False, level=OptionRecommendation.LOW,
|
||||
help=_('Normally this input plugin re-arranges all the input '
|
||||
'files into a standard folder hierarchy. Only use this option '
|
||||
'if you know what you are doing as it can result in various '
|
||||
'nasty side effects in the rest of of the conversion pipeline.'
|
||||
)
|
||||
),
|
||||
])
|
||||
|
||||
def convert(self, stream, opts, file_ext, log,
|
||||
@ -276,6 +284,9 @@ class HTMLInput(InputFormatPlugin):
|
||||
mi.render(open('metadata.opf', 'wb'))
|
||||
opfpath = os.path.abspath('metadata.opf')
|
||||
|
||||
if opts.dont_package:
|
||||
return opfpath
|
||||
|
||||
from calibre.ebooks.conversion.plumber import create_oebbook
|
||||
oeb = create_oebbook(log, opfpath, opts)
|
||||
|
||||
|
24
src/calibre/ebooks/lit/input.py
Normal file
24
src/calibre/ebooks/lit/input.py
Normal file
@ -0,0 +1,24 @@
|
||||
#!/usr/bin/env python
|
||||
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
|
||||
from __future__ import with_statement
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2009, Kovid Goyal <kovid@kovidgoyal.net>'
|
||||
__docformat__ = 'restructuredtext en'
|
||||
|
||||
from calibre.customize.conversion import InputFormatPlugin
|
||||
|
||||
class LITInput(InputFormatPlugin):
|
||||
|
||||
name = 'LIT Input'
|
||||
author = 'Marshall T. Vandegrift'
|
||||
description = 'Convert LIT files to HTML'
|
||||
file_types = set(['lit'])
|
||||
|
||||
def convert(self, stream, options, file_ext, log,
|
||||
accelerators):
|
||||
from calibre.ebooks.lit.reader import LitReader
|
||||
from calibre.ebooks.conversion.plumber import create_oebbook
|
||||
return create_oebbook(log, stream, options, reader=LitReader)
|
||||
|
||||
|
@ -7,13 +7,12 @@ __license__ = 'GPL v3'
|
||||
__copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net> ' \
|
||||
'and Marshall T. Vandegrift <llasram@gmail.com>'
|
||||
|
||||
import sys, struct, os
|
||||
import struct, os
|
||||
import functools
|
||||
import re
|
||||
from urlparse import urldefrag
|
||||
from cStringIO import StringIO
|
||||
from urllib import unquote as urlunquote
|
||||
from lxml import etree
|
||||
from calibre.ebooks.lit import LitError
|
||||
from calibre.ebooks.lit.maps import OPF_MAP, HTML_MAP
|
||||
import calibre.ebooks.lit.mssha1 as mssha1
|
||||
@ -29,12 +28,12 @@ __all__ = ["LitReader"]
|
||||
XML_DECL = """<?xml version="1.0" encoding="UTF-8" ?>
|
||||
"""
|
||||
OPF_DECL = """<?xml version="1.0" encoding="UTF-8" ?>
|
||||
<!DOCTYPE package
|
||||
<!DOCTYPE package
|
||||
PUBLIC "+//ISBN 0-9673008-1-9//DTD OEB 1.0.1 Package//EN"
|
||||
"http://openebook.org/dtds/oeb-1.0.1/oebpkg101.dtd">
|
||||
"""
|
||||
HTML_DECL = """<?xml version="1.0" encoding="UTF-8" ?>
|
||||
<!DOCTYPE html PUBLIC
|
||||
<!DOCTYPE html PUBLIC
|
||||
"+//ISBN 0-9673008-1-9//DTD OEB 1.0.1 Document//EN"
|
||||
"http://openebook.org/dtds/oeb-1.0.1/oebdoc101.dtd">
|
||||
"""
|
||||
@ -73,7 +72,7 @@ def encint(bytes, remaining):
|
||||
val <<= 7
|
||||
val |= (b & 0x7f)
|
||||
if b & 0x80 == 0: break
|
||||
return val, bytes[pos:], remaining
|
||||
return val, bytes[pos:], remaining
|
||||
|
||||
def msguid(bytes):
|
||||
values = struct.unpack("<LHHBBBBBBBB", bytes[:16])
|
||||
@ -123,7 +122,7 @@ class UnBinary(object):
|
||||
CLOSE_ANGLE_RE = re.compile(r'(?<!--)>>(?=>>|[^>])')
|
||||
DOUBLE_ANGLE_RE = re.compile(r'([<>])\1')
|
||||
EMPTY_ATOMS = ({},{})
|
||||
|
||||
|
||||
def __init__(self, bin, path, manifest={}, map=HTML_MAP, atoms=EMPTY_ATOMS):
|
||||
self.manifest = manifest
|
||||
self.tag_map, self.attr_map, self.tag_to_attr_map = map
|
||||
@ -143,7 +142,7 @@ class UnBinary(object):
|
||||
raw = self.CLOSE_ANGLE_RE.sub(r'>', raw)
|
||||
raw = self.DOUBLE_ANGLE_RE.sub(r'\1', raw)
|
||||
self.raw = raw
|
||||
|
||||
|
||||
def item_path(self, internal_id):
|
||||
try:
|
||||
target = self.manifest[internal_id].path
|
||||
@ -159,7 +158,7 @@ class UnBinary(object):
|
||||
index += 1
|
||||
relpath = (['..'] * (len(base) - index)) + target[index:]
|
||||
return '/'.join(relpath)
|
||||
|
||||
|
||||
def __unicode__(self):
|
||||
return self.raw.decode('utf-8')
|
||||
|
||||
@ -172,11 +171,11 @@ class UnBinary(object):
|
||||
in_censorship = is_goingdown = False
|
||||
state = 'text'
|
||||
flags = 0
|
||||
|
||||
|
||||
while index < len(bin):
|
||||
c, index = read_utf8_char(bin, index)
|
||||
oc = ord(c)
|
||||
|
||||
|
||||
if state == 'text':
|
||||
if oc == 0:
|
||||
state = 'get flags'
|
||||
@ -188,14 +187,14 @@ class UnBinary(object):
|
||||
elif c == '<':
|
||||
c = '<<'
|
||||
buf.write(encode(c))
|
||||
|
||||
|
||||
elif state == 'get flags':
|
||||
if oc == 0:
|
||||
state = 'text'
|
||||
continue
|
||||
flags = oc
|
||||
state = 'get tag'
|
||||
|
||||
|
||||
elif state == 'get tag':
|
||||
state = 'text' if oc == 0 else 'get attr'
|
||||
if flags & FLAG_OPENING:
|
||||
@ -226,7 +225,7 @@ class UnBinary(object):
|
||||
if depth == 0:
|
||||
raise LitError('Extra closing tag')
|
||||
return index
|
||||
|
||||
|
||||
elif state == 'get attr':
|
||||
in_censorship = False
|
||||
if oc == 0:
|
||||
@ -265,7 +264,7 @@ class UnBinary(object):
|
||||
state = 'get href length'
|
||||
else:
|
||||
state = 'get value length'
|
||||
|
||||
|
||||
elif state == 'get value length':
|
||||
if not in_censorship:
|
||||
buf.write('"')
|
||||
@ -281,7 +280,7 @@ class UnBinary(object):
|
||||
continue
|
||||
if count < 0 or count > (len(bin) - index):
|
||||
raise LitError('Invalid character count %d' % count)
|
||||
|
||||
|
||||
elif state == 'get value':
|
||||
if count == 0xfffe:
|
||||
if not in_censorship:
|
||||
@ -301,7 +300,7 @@ class UnBinary(object):
|
||||
buf.write('"')
|
||||
in_censorship = False
|
||||
state = 'get attr'
|
||||
|
||||
|
||||
elif state == 'get custom length':
|
||||
count = oc - 1
|
||||
if count <= 0 or count > len(bin)-index:
|
||||
@ -309,21 +308,21 @@ class UnBinary(object):
|
||||
dynamic_tag += 1
|
||||
state = 'get custom'
|
||||
tag_name = ''
|
||||
|
||||
|
||||
elif state == 'get custom':
|
||||
tag_name += c
|
||||
count -= 1
|
||||
if count == 0:
|
||||
buf.write(encode(tag_name))
|
||||
state = 'get attr'
|
||||
|
||||
|
||||
elif state == 'get attr length':
|
||||
count = oc - 1
|
||||
if count <= 0 or count > (len(bin) - index):
|
||||
raise LitError('Invalid character count %d' % count)
|
||||
buf.write(' ')
|
||||
state = 'get custom attr'
|
||||
|
||||
|
||||
elif state == 'get custom attr':
|
||||
buf.write(encode(c))
|
||||
count -= 1
|
||||
@ -337,7 +336,7 @@ class UnBinary(object):
|
||||
raise LitError('Invalid character count %d' % count)
|
||||
href = ''
|
||||
state = 'get href'
|
||||
|
||||
|
||||
elif state == 'get href':
|
||||
href += c
|
||||
count -= 1
|
||||
@ -350,7 +349,7 @@ class UnBinary(object):
|
||||
buf.write(encode(u'"%s"' % path))
|
||||
state = 'get attr'
|
||||
return index
|
||||
|
||||
|
||||
|
||||
class DirectoryEntry(object):
|
||||
def __init__(self, name, section, offset, size):
|
||||
@ -358,11 +357,11 @@ class DirectoryEntry(object):
|
||||
self.section = section
|
||||
self.offset = offset
|
||||
self.size = size
|
||||
|
||||
|
||||
def __repr__(self):
|
||||
return "DirectoryEntry(name=%s, section=%d, offset=%d, size=%d)" \
|
||||
% (repr(self.name), self.section, self.offset, self.size)
|
||||
|
||||
|
||||
def __str__(self):
|
||||
return repr(self)
|
||||
|
||||
@ -382,12 +381,12 @@ class ManifestItem(object):
|
||||
path = os.path.normpath(path).replace('\\', '/')
|
||||
while path.startswith('../'): path = path[3:]
|
||||
self.path = path
|
||||
|
||||
|
||||
def __eq__(self, other):
|
||||
if hasattr(other, 'internal'):
|
||||
return self.internal == other.internal
|
||||
return self.internal == other
|
||||
|
||||
|
||||
def __repr__(self):
|
||||
return "ManifestItem(internal=%r, path=%r, mime_type=%r, " \
|
||||
"offset=%d, root=%r, state=%r)" \
|
||||
@ -404,7 +403,7 @@ def preserve(function):
|
||||
self.stream.seek(opos)
|
||||
functools.update_wrapper(wrapper, function)
|
||||
return wrapper
|
||||
|
||||
|
||||
class LitFile(object):
|
||||
PIECE_SIZE = 16
|
||||
|
||||
@ -438,14 +437,14 @@ class LitFile(object):
|
||||
return self.stream.read(8)
|
||||
return property(fget=fget)
|
||||
magic = magic()
|
||||
|
||||
|
||||
def version():
|
||||
def fget(self):
|
||||
self.stream.seek(8)
|
||||
return u32(self.stream.read(4))
|
||||
return property(fget=fget)
|
||||
version = version()
|
||||
|
||||
|
||||
def hdr_len():
|
||||
@preserve
|
||||
def fget(self):
|
||||
@ -453,7 +452,7 @@ class LitFile(object):
|
||||
return int32(self.stream.read(4))
|
||||
return property(fget=fget)
|
||||
hdr_len = hdr_len()
|
||||
|
||||
|
||||
def num_pieces():
|
||||
@preserve
|
||||
def fget(self):
|
||||
@ -461,7 +460,7 @@ class LitFile(object):
|
||||
return int32(self.stream.read(4))
|
||||
return property(fget=fget)
|
||||
num_pieces = num_pieces()
|
||||
|
||||
|
||||
def sec_hdr_len():
|
||||
@preserve
|
||||
def fget(self):
|
||||
@ -469,7 +468,7 @@ class LitFile(object):
|
||||
return int32(self.stream.read(4))
|
||||
return property(fget=fget)
|
||||
sec_hdr_len = sec_hdr_len()
|
||||
|
||||
|
||||
def guid():
|
||||
@preserve
|
||||
def fget(self):
|
||||
@ -477,7 +476,7 @@ class LitFile(object):
|
||||
return self.stream.read(16)
|
||||
return property(fget=fget)
|
||||
guid = guid()
|
||||
|
||||
|
||||
def header():
|
||||
@preserve
|
||||
def fget(self):
|
||||
@ -488,7 +487,7 @@ class LitFile(object):
|
||||
return self.stream.read(size)
|
||||
return property(fget=fget)
|
||||
header = header()
|
||||
|
||||
|
||||
@preserve
|
||||
def __len__(self):
|
||||
self.stream.seek(0, 2)
|
||||
@ -501,7 +500,7 @@ class LitFile(object):
|
||||
|
||||
def read_content(self, offset, size):
|
||||
return self.read_raw(self.content_offset + offset, size)
|
||||
|
||||
|
||||
def read_secondary_header(self):
|
||||
offset = self.hdr_len + (self.num_pieces * self.PIECE_SIZE)
|
||||
bytes = self.read_raw(offset, self.sec_hdr_len)
|
||||
@ -526,12 +525,12 @@ class LitFile(object):
|
||||
if u32(bytes[offset+4+16:]):
|
||||
raise LitError('This file has a 64bit content offset')
|
||||
self.content_offset = u32(bytes[offset+16:])
|
||||
self.timestamp = u32(bytes[offset+24:])
|
||||
self.timestamp = u32(bytes[offset+24:])
|
||||
self.language_id = u32(bytes[offset+28:])
|
||||
offset += 48
|
||||
if not hasattr(self, 'content_offset'):
|
||||
raise LitError('Could not figure out the content offset')
|
||||
|
||||
|
||||
def read_header_pieces(self):
|
||||
src = self.header[self.hdr_len:]
|
||||
for i in xrange(self.num_pieces):
|
||||
@ -556,7 +555,7 @@ class LitFile(object):
|
||||
self.piece3_guid = piece
|
||||
elif i == 4:
|
||||
self.piece4_guid = piece
|
||||
|
||||
|
||||
def read_directory(self, piece):
|
||||
if not piece.startswith('IFCM'):
|
||||
raise LitError('Header piece #1 is not main directory.')
|
||||
@ -760,9 +759,9 @@ class LitFile(object):
|
||||
raise LitError("Reset table is too short")
|
||||
if u32(reset_table[RESET_UCLENGTH + 4:]) != 0:
|
||||
raise LitError("Reset table has 64bit value for UCLENGTH")
|
||||
|
||||
|
||||
result = []
|
||||
|
||||
|
||||
window_size = 14
|
||||
u = u32(control[CONTROL_WINDOW_SIZE:])
|
||||
while u > 0:
|
||||
@ -847,13 +846,13 @@ class LitContainer(object):
|
||||
|
||||
def __init__(self, filename_or_stream):
|
||||
self._litfile = LitFile(filename_or_stream)
|
||||
|
||||
|
||||
def namelist(self):
|
||||
return self._litfile.paths.keys()
|
||||
|
||||
def exists(self, name):
|
||||
return urlunquote(name) in self._litfile.paths
|
||||
|
||||
|
||||
def read(self, name):
|
||||
entry = self._litfile.paths[urlunquote(name)] if name else None
|
||||
if entry is None:
|
||||
@ -869,7 +868,7 @@ class LitContainer(object):
|
||||
internal = '/'.join(('/data', entry.internal))
|
||||
content = self._litfile.get_file(internal)
|
||||
return content
|
||||
|
||||
|
||||
def _read_meta(self):
|
||||
path = 'content.opf'
|
||||
raw = self._litfile.get_file('/meta')
|
||||
|
@ -272,11 +272,7 @@ def XPath(expr):
|
||||
def xpath(elem, expr):
|
||||
return elem.xpath(expr, namespaces=XPNSMAP)
|
||||
|
||||
def _prepare_xml_for_serialization(root):
|
||||
pass
|
||||
|
||||
def xml2str(root, pretty_print=False, strip_comments=False):
|
||||
_prepare_xml_for_serialization(root)
|
||||
ans = etree.tostring(root, encoding='utf-8', xml_declaration=True,
|
||||
pretty_print=pretty_print)
|
||||
|
||||
@ -287,7 +283,6 @@ def xml2str(root, pretty_print=False, strip_comments=False):
|
||||
|
||||
|
||||
def xml2unicode(root, pretty_print=False):
|
||||
_prepare_xml_for_serialization(root)
|
||||
return etree.tostring(root, pretty_print=pretty_print)
|
||||
|
||||
ASCII_CHARS = set(chr(x) for x in xrange(128))
|
||||
@ -321,6 +316,25 @@ def urlnormalize(href):
|
||||
parts = (urlquote(part) for part in parts)
|
||||
return urlunparse(parts)
|
||||
|
||||
class DummyHandler(logging.Handler):
|
||||
|
||||
def __init__(self):
|
||||
logging.Handler.__init__(self, logging.WARNING)
|
||||
self.setFormatter(logging.Formatter('%(message)s'))
|
||||
self.log = None
|
||||
|
||||
def emit(self, record):
|
||||
if self.log is not None:
|
||||
msg = self.format(record)
|
||||
f = self.log.error if record.levelno >= logging.ERROR \
|
||||
else self.log.warn
|
||||
f(msg)
|
||||
|
||||
|
||||
_css_logger = logging.getLogger('calibre.css')
|
||||
_css_logger.setLevel(logging.WARNING)
|
||||
_css_log_handler = DummyHandler()
|
||||
_css_logger.addHandler(_css_log_handler)
|
||||
|
||||
class OEBError(Exception):
|
||||
"""Generic OEB-processing error."""
|
||||
@ -778,7 +792,8 @@ class Manifest(object):
|
||||
data = self.oeb.css_preprocessor(data)
|
||||
data = XHTML_CSS_NAMESPACE + data
|
||||
parser = CSSParser(loglevel=logging.WARNING,
|
||||
fetcher=self._fetch_css)
|
||||
fetcher=self._fetch_css,
|
||||
log=_css_logger)
|
||||
data = parser.parseString(data, href=self.href)
|
||||
data.namespaces['h'] = XHTML_NS
|
||||
return data
|
||||
@ -1435,7 +1450,7 @@ class OEBBook(object):
|
||||
:attr:`pages`: List of "pages," such as indexed to a print edition of
|
||||
the same text.
|
||||
"""
|
||||
|
||||
_css_log_handler.log = logger
|
||||
self.encoding = encoding
|
||||
self.html_preprocessor = html_preprocessor
|
||||
self.css_preprocessor = css_preprocessor
|
||||
@ -1450,6 +1465,7 @@ class OEBBook(object):
|
||||
self.guide = Guide(self)
|
||||
self.toc = TOC()
|
||||
self.pages = PageList()
|
||||
self.auto_generated_toc = True
|
||||
|
||||
@classmethod
|
||||
def generate(cls, opts):
|
||||
|
@ -13,13 +13,12 @@ from PyQt4.Qt import QFontDatabase
|
||||
|
||||
from calibre.customize.ui import available_input_formats
|
||||
from calibre.ebooks.epub.from_html import TITLEPAGE
|
||||
from calibre.ebooks.metadata.opf2 import OPF, OPFCreator
|
||||
from calibre.ebooks.metadata.opf2 import OPF
|
||||
from calibre.ptempfile import TemporaryDirectory
|
||||
from calibre.ebooks.chardet import xml_to_unicode
|
||||
from calibre.utils.zipfile import safe_replace, ZipFile
|
||||
from calibre.utils.config import DynamicConfig
|
||||
from calibre.utils.logging import Log
|
||||
from calibre import CurrentDir
|
||||
|
||||
def character_count(html):
|
||||
'''
|
||||
@ -57,31 +56,21 @@ class FakeOpts(object):
|
||||
max_levels = 5
|
||||
input_encoding = None
|
||||
|
||||
def html2opf(path, tdir, log):
|
||||
from calibre.ebooks.html.input import get_filelist
|
||||
from calibre.ebooks.metadata.meta import get_metadata
|
||||
with CurrentDir(tdir):
|
||||
fl = get_filelist(path, tdir, FakeOpts(), log)
|
||||
mi = get_metadata(open(path, 'rb'), 'html')
|
||||
mi = OPFCreator(os.getcwdu(), mi)
|
||||
mi.guide = None
|
||||
entries = [(f.path, 'application/xhtml+xml') for f in fl]
|
||||
mi.create_manifest(entries)
|
||||
mi.create_spine([f.path for f in fl])
|
||||
|
||||
mi.render(open('metadata.opf', 'wb'))
|
||||
opfpath = os.path.abspath('metadata.opf')
|
||||
|
||||
return opfpath
|
||||
|
||||
def opf2opf(path, tdir, opts):
|
||||
return path
|
||||
|
||||
def is_supported(path):
|
||||
ext = os.path.splitext(path)[1].replace('.', '').lower()
|
||||
ext = re.sub(r'(x{0,1})htm(l{0,1})', 'html', ext)
|
||||
return ext in available_input_formats()
|
||||
|
||||
|
||||
def write_oebbook(oeb, path):
|
||||
from calibre.ebooks.oeb.writer import OEBWriter
|
||||
from calibre import walk
|
||||
w = OEBWriter()
|
||||
w(oeb, path)
|
||||
for f in walk(path):
|
||||
if f.endswith('.opf'):
|
||||
return f
|
||||
|
||||
class EbookIterator(object):
|
||||
|
||||
CHARACTERS_PER_PAGE = 1000
|
||||
@ -131,17 +120,16 @@ class EbookIterator(object):
|
||||
def __enter__(self):
|
||||
self._tdir = TemporaryDirectory('_ebook_iter')
|
||||
self.base = self._tdir.__enter__()
|
||||
if self.ebook_ext == 'opf':
|
||||
self.pathtoopf = self.pathtoebook
|
||||
elif self.ebook_ext == 'html':
|
||||
self.pathtoopf = html2opf(self.pathtoebook, self.base, self.log)
|
||||
else:
|
||||
from calibre.ebooks.conversion.plumber import Plumber
|
||||
plumber = Plumber(self.pathtoebook, self.base, self.log)
|
||||
plumber.setup_options()
|
||||
self.pathtoopf = plumber.input_plugin(open(plumber.input, 'rb'),
|
||||
plumber.opts, plumber.input_fmt, self.log,
|
||||
{}, self.base)
|
||||
from calibre.ebooks.conversion.plumber import Plumber
|
||||
plumber = Plumber(self.pathtoebook, self.base, self.log)
|
||||
plumber.setup_options()
|
||||
if hasattr(plumber.opts, 'dont_package'):
|
||||
plumber.opts.dont_package = True
|
||||
self.pathtoopf = plumber.input_plugin(open(plumber.input, 'rb'),
|
||||
plumber.opts, plumber.input_fmt, self.log,
|
||||
{}, self.base)
|
||||
if hasattr(self.pathtoopf, 'manifest'):
|
||||
self.pathtoopf = write_oebbook(self.pathtoebook, self._tdir)
|
||||
|
||||
|
||||
self.opf = OPF(self.pathtoopf, os.path.dirname(self.pathtoopf))
|
||||
|
@ -16,7 +16,6 @@ class OEBOutput(OutputFormatPlugin):
|
||||
author = 'Kovid Goyal'
|
||||
file_type = 'oeb'
|
||||
|
||||
|
||||
def convert(self, oeb_book, output_path, input_plugin, opts, log):
|
||||
self.log, self.opts = log, opts
|
||||
if not os.path.exists(output_path):
|
||||
|
@ -349,6 +349,7 @@ class OEBReader(object):
|
||||
def _toc_from_ncx(self, item):
|
||||
if item is None:
|
||||
return False
|
||||
self.log.debug('Reading TOC from NCX...')
|
||||
ncx = item.data
|
||||
title = ''.join(xpath(ncx, 'ncx:docTitle/ncx:text/text()'))
|
||||
title = COLLAPSE_RE.sub(' ', title.strip())
|
||||
@ -364,6 +365,7 @@ class OEBReader(object):
|
||||
result = xpath(opf, 'o2:tours/o2:tour')
|
||||
if not result:
|
||||
return False
|
||||
self.log.debug('Reading TOC from tour...')
|
||||
tour = result[0]
|
||||
toc = self.oeb.toc
|
||||
toc.title = tour.get('title')
|
||||
@ -384,6 +386,7 @@ class OEBReader(object):
|
||||
def _toc_from_html(self, opf):
|
||||
if 'toc' not in self.oeb.guide:
|
||||
return False
|
||||
self.log.debug('Reading TOC from HTML...')
|
||||
itempath, frag = urldefrag(self.oeb.guide['toc'].href)
|
||||
item = self.oeb.manifest.hrefs[itempath]
|
||||
html = item.data
|
||||
@ -414,6 +417,7 @@ class OEBReader(object):
|
||||
return True
|
||||
|
||||
def _toc_from_spine(self, opf):
|
||||
self.log.warn('Generating default TOC from spine...')
|
||||
toc = self.oeb.toc
|
||||
titles = []
|
||||
headers = []
|
||||
@ -441,11 +445,14 @@ class OEBReader(object):
|
||||
return True
|
||||
|
||||
def _toc_from_opf(self, opf, item):
|
||||
self.oeb.auto_generated_toc = False
|
||||
if self._toc_from_ncx(item): return
|
||||
if self._toc_from_tour(opf): return
|
||||
self.logger.warn('No metadata table of contents found')
|
||||
# Prefer HTML to tour based TOC, since several LIT files
|
||||
# have good HTML TOCs but bad tour based TOCs
|
||||
if self._toc_from_html(opf): return
|
||||
if self._toc_from_tour(opf): return
|
||||
self._toc_from_spine(opf)
|
||||
self.oeb.auto_generated_toc = True
|
||||
|
||||
def _pages_from_ncx(self, opf, item):
|
||||
if item is None:
|
||||
|
@ -51,8 +51,8 @@ class Split(object):
|
||||
self.log = oeb.log
|
||||
self.map = {}
|
||||
self.page_break_selectors = None
|
||||
for item in self.oeb.manifest.items:
|
||||
if etree.iselement(item.data):
|
||||
for item in list(self.oeb.manifest.items):
|
||||
if item.spine_position is not None and etree.iselement(item.data):
|
||||
self.split_item(item)
|
||||
|
||||
self.fix_links()
|
||||
@ -74,31 +74,34 @@ class Split(object):
|
||||
self.page_break_selectors = set([])
|
||||
stylesheets = [x.data for x in self.oeb.manifest if x.media_type in
|
||||
OEB_STYLES]
|
||||
page_break_selectors = set([])
|
||||
for rule in rules(stylesheets):
|
||||
before = getattr(rule.style.getPropertyCSSValue(
|
||||
'page-break-before'), 'cssText', '').strip().lower()
|
||||
after = getattr(rule.style.getPropertyCSSValue(
|
||||
'page-break-after'), 'cssText', '').strip().lower()
|
||||
try:
|
||||
if before and before != 'avoid':
|
||||
page_break_selectors.add((CSSSelector(rule.selectorText),
|
||||
True))
|
||||
except:
|
||||
pass
|
||||
try:
|
||||
if after and after != 'avoid':
|
||||
page_break_selectors.add((CSSSelector(rule.selectorText),
|
||||
False))
|
||||
except:
|
||||
pass
|
||||
for rule in rules(stylesheets):
|
||||
before = getattr(rule.style.getPropertyCSSValue(
|
||||
'page-break-before'), 'cssText', '').strip().lower()
|
||||
after = getattr(rule.style.getPropertyCSSValue(
|
||||
'page-break-after'), 'cssText', '').strip().lower()
|
||||
try:
|
||||
if before and before != 'avoid':
|
||||
self.page_break_selectors.add((CSSSelector(rule.selectorText),
|
||||
True))
|
||||
except:
|
||||
pass
|
||||
try:
|
||||
if after and after != 'avoid':
|
||||
self.page_break_selectors.add((CSSSelector(rule.selectorText),
|
||||
False))
|
||||
except:
|
||||
pass
|
||||
|
||||
page_breaks = set([])
|
||||
for selector, before in page_break_selectors:
|
||||
for elem in selector(item.data):
|
||||
if before:
|
||||
elem.set('pb_before', '1')
|
||||
page_breaks.add(elem)
|
||||
for selector, before in self.page_break_selectors:
|
||||
body = item.data.xpath('//h:body', namespaces=NAMESPACES)
|
||||
if not body:
|
||||
continue
|
||||
for elem in selector(body[0]):
|
||||
if elem not in body:
|
||||
if before:
|
||||
elem.set('pb_before', '1')
|
||||
page_breaks.add(elem)
|
||||
|
||||
for i, elem in enumerate(item.data.iter()):
|
||||
elem.set('pb_order', str(i))
|
||||
@ -136,8 +139,10 @@ class Split(object):
|
||||
if href in self.map:
|
||||
anchor_map = self.map[href]
|
||||
nhref = anchor_map[frag if frag else None]
|
||||
nhref = self.current_item.relhref(nhref)
|
||||
if frag:
|
||||
nhref = '#'.join(href, frag)
|
||||
nhref = '#'.join((nhref, frag))
|
||||
|
||||
return nhref
|
||||
return url
|
||||
|
||||
@ -153,7 +158,7 @@ class FlowSplitter(object):
|
||||
self.page_breaks = page_breaks
|
||||
self.page_break_ids = page_break_ids
|
||||
self.max_flow_size = max_flow_size
|
||||
self.base = item.abshref(item.href)
|
||||
self.base = item.href
|
||||
|
||||
base, ext = os.path.splitext(self.base)
|
||||
self.base = base.replace('%', '%%')+'_split_%d'+ext
|
||||
@ -192,9 +197,9 @@ class FlowSplitter(object):
|
||||
self.trees = []
|
||||
tree = orig_tree
|
||||
for pattern, before in ordered_ids:
|
||||
self.log.debug('\t\tSplitting on page-break')
|
||||
elem = pattern(tree)
|
||||
if elem:
|
||||
self.log.debug('\t\tSplitting on page-break')
|
||||
before, after = self.do_split(tree, elem[0], before)
|
||||
self.trees.append(before)
|
||||
tree = after
|
||||
@ -414,13 +419,14 @@ class FlowSplitter(object):
|
||||
elem.attrib.pop(SPLIT_ATTR, None)
|
||||
elem.attrib.pop(SPLIT_POINT_ATTR, '0')
|
||||
|
||||
spine_pos = self.item.spine_pos
|
||||
for current, tree in zip(map(reversed, (self.files, self.trees))):
|
||||
spine_pos = self.item.spine_position
|
||||
for current, tree in zip(*map(reversed, (self.files, self.trees))):
|
||||
for a in tree.getroot().xpath('//h:a[@href]', namespaces=NAMESPACES):
|
||||
href = a.get('href').strip()
|
||||
if href.startswith('#'):
|
||||
anchor = href[1:]
|
||||
file = self.anchor_map[anchor]
|
||||
file = self.item.relhref(file)
|
||||
if file != current:
|
||||
a.set('href', file+href)
|
||||
|
||||
@ -430,12 +436,12 @@ class FlowSplitter(object):
|
||||
self.oeb.spine.insert(spine_pos, new_item, self.item.linear)
|
||||
|
||||
if self.oeb.guide:
|
||||
for ref in self.oeb.guide:
|
||||
for ref in self.oeb.guide.values():
|
||||
href, frag = urldefrag(ref.href)
|
||||
if href == self.item.href:
|
||||
nhref = self.anchor_map[frag if frag else None]
|
||||
if frag:
|
||||
nhref = '#'.join(nhref, frag)
|
||||
nhref = '#'.join((nhref, frag))
|
||||
ref.href = nhref
|
||||
|
||||
def fix_toc_entry(toc):
|
||||
@ -444,7 +450,7 @@ class FlowSplitter(object):
|
||||
if href == self.item.href:
|
||||
nhref = self.anchor_map[frag if frag else None]
|
||||
if frag:
|
||||
nhref = '#'.join(nhref, frag)
|
||||
nhref = '#'.join((nhref, frag))
|
||||
toc.href = nhref
|
||||
for x in toc:
|
||||
fix_toc_entry(x)
|
||||
|
@ -49,7 +49,7 @@ class OEBWriter(object):
|
||||
|
||||
def __call__(self, oeb, path):
|
||||
"""
|
||||
Read the book in the :class:`OEBBook` object :param:`oeb` to a file
|
||||
Write the book in the :class:`OEBBook` object :param:`oeb` to a folder
|
||||
at :param:`path`.
|
||||
"""
|
||||
version = int(self.version[0])
|
||||
|
Loading…
x
Reference in New Issue
Block a user