mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Added LIT input plugin. Ported splitting code now works (at least on the handful of files I've tested)
This commit is contained in:
parent
b9f80aa229
commit
3e29dfbe56
@ -263,14 +263,14 @@ class MOBIMetadataWriter(MetadataWriterPlugin):
|
|||||||
def set_metadata(self, stream, mi, type):
|
def set_metadata(self, stream, mi, type):
|
||||||
from calibre.ebooks.metadata.mobi import set_metadata
|
from calibre.ebooks.metadata.mobi import set_metadata
|
||||||
set_metadata(stream, mi)
|
set_metadata(stream, mi)
|
||||||
|
|
||||||
class PDFMetadataWriter(MetadataWriterPlugin):
|
class PDFMetadataWriter(MetadataWriterPlugin):
|
||||||
|
|
||||||
name = 'Set PDF metadata'
|
name = 'Set PDF metadata'
|
||||||
file_types = set(['pdf'])
|
file_types = set(['pdf'])
|
||||||
description = _('Set metadata in %s files') % 'PDF'
|
description = _('Set metadata in %s files') % 'PDF'
|
||||||
author = 'John Schember'
|
author = 'John Schember'
|
||||||
|
|
||||||
def set_metadata(self, stream, mi, type):
|
def set_metadata(self, stream, mi, type):
|
||||||
from calibre.ebooks.metadata.pdf import set_metadata
|
from calibre.ebooks.metadata.pdf import set_metadata
|
||||||
set_metadata(stream, mi)
|
set_metadata(stream, mi)
|
||||||
@ -280,6 +280,7 @@ from calibre.ebooks.epub.input import EPUBInput
|
|||||||
from calibre.ebooks.mobi.input import MOBIInput
|
from calibre.ebooks.mobi.input import MOBIInput
|
||||||
from calibre.ebooks.pdf.input import PDFInput
|
from calibre.ebooks.pdf.input import PDFInput
|
||||||
from calibre.ebooks.txt.input import TXTInput
|
from calibre.ebooks.txt.input import TXTInput
|
||||||
|
from calibre.ebooks.lit.input import LITInput
|
||||||
from calibre.ebooks.html.input import HTMLInput
|
from calibre.ebooks.html.input import HTMLInput
|
||||||
from calibre.ebooks.oeb.output import OEBOutput
|
from calibre.ebooks.oeb.output import OEBOutput
|
||||||
from calibre.ebooks.txt.output import TXTOutput
|
from calibre.ebooks.txt.output import TXTOutput
|
||||||
@ -287,7 +288,7 @@ from calibre.ebooks.pdf.output import PDFOutput
|
|||||||
from calibre.customize.profiles import input_profiles, output_profiles
|
from calibre.customize.profiles import input_profiles, output_profiles
|
||||||
|
|
||||||
plugins = [HTML2ZIP, EPUBInput, MOBIInput, PDFInput, HTMLInput,
|
plugins = [HTML2ZIP, EPUBInput, MOBIInput, PDFInput, HTMLInput,
|
||||||
TXTInput, OEBOutput, TXTOutput, PDFOutput]
|
TXTInput, OEBOutput, TXTOutput, PDFOutput, LITInput]
|
||||||
plugins += [x for x in list(locals().values()) if isinstance(x, type) and \
|
plugins += [x for x in list(locals().values()) if isinstance(x, type) and \
|
||||||
x.__name__.endswith('MetadataReader')]
|
x.__name__.endswith('MetadataReader')]
|
||||||
plugins += [x for x in list(locals().values()) if isinstance(x, type) and \
|
plugins += [x for x in list(locals().values()) if isinstance(x, type) and \
|
||||||
|
@ -41,6 +41,11 @@ class ConversionOption(object):
|
|||||||
def __eq__(self, other):
|
def __eq__(self, other):
|
||||||
return hash(self) == hash(other)
|
return hash(self) == hash(other)
|
||||||
|
|
||||||
|
def clone(self):
|
||||||
|
return ConversionOption(name=self.name, help=self.help,
|
||||||
|
long_switch=self.long_switch, short_switch=self.short_switch,
|
||||||
|
choices=self.choices)
|
||||||
|
|
||||||
class OptionRecommendation(object):
|
class OptionRecommendation(object):
|
||||||
LOW = 1
|
LOW = 1
|
||||||
MED = 2
|
MED = 2
|
||||||
@ -59,6 +64,10 @@ class OptionRecommendation(object):
|
|||||||
|
|
||||||
self.validate_parameters()
|
self.validate_parameters()
|
||||||
|
|
||||||
|
def clone(self):
|
||||||
|
return OptionRecommendation(recommended_value=self.recommended_value,
|
||||||
|
level=self.level, option=self.option.clone())
|
||||||
|
|
||||||
def validate_parameters(self):
|
def validate_parameters(self):
|
||||||
if self.option.choices and self.recommended_value not in \
|
if self.option.choices and self.recommended_value not in \
|
||||||
self.option.choices:
|
self.option.choices:
|
||||||
@ -170,8 +179,14 @@ class InputFormatPlugin(Plugin):
|
|||||||
options.debug_input = os.path.abspath(options.debug_input)
|
options.debug_input = os.path.abspath(options.debug_input)
|
||||||
if not os.path.exists(options.debug_input):
|
if not os.path.exists(options.debug_input):
|
||||||
os.makedirs(options.debug_input)
|
os.makedirs(options.debug_input)
|
||||||
shutil.rmtree(options.debug_input)
|
if isinstance(ret, basestring):
|
||||||
shutil.copytree(output_dir, options.debug_input)
|
shutil.rmtree(options.debug_input)
|
||||||
|
shutil.copytree(output_dir, options.debug_input)
|
||||||
|
else:
|
||||||
|
from calibre.ebooks.oeb.writer import OEBWriter
|
||||||
|
w = OEBWriter(pretty_print=options.pretty_print)
|
||||||
|
w(ret, options.debug_input)
|
||||||
|
|
||||||
log.info('Input debug saved to:', options.debug_input)
|
log.info('Input debug saved to:', options.debug_input)
|
||||||
|
|
||||||
return ret
|
return ret
|
||||||
|
@ -57,7 +57,7 @@ def check_command_line_options(parser, args, log):
|
|||||||
raise SystemExit(1)
|
raise SystemExit(1)
|
||||||
|
|
||||||
output = args[2]
|
output = args[2]
|
||||||
if output.startswith('.'):
|
if output.startswith('.') and output != '.':
|
||||||
output = os.path.splitext(os.path.basename(input))[0]+output
|
output = os.path.splitext(os.path.basename(input))[0]+output
|
||||||
output = os.path.abspath(output)
|
output = os.path.abspath(output)
|
||||||
|
|
||||||
@ -171,7 +171,8 @@ def main(args=sys.argv):
|
|||||||
|
|
||||||
plumber.run()
|
plumber.run()
|
||||||
|
|
||||||
log(_('Output saved to'), ' ', plumber.output)
|
if plumber.opts.debug_input is None:
|
||||||
|
log(_('Output saved to'), ' ', plumber.output)
|
||||||
|
|
||||||
return 0
|
return 0
|
||||||
|
|
||||||
|
@ -32,8 +32,8 @@ class Plumber(object):
|
|||||||
:param input: Path to input file.
|
:param input: Path to input file.
|
||||||
:param output: Path to output file/directory
|
:param output: Path to output file/directory
|
||||||
'''
|
'''
|
||||||
self.input = input
|
self.input = os.path.abspath(input)
|
||||||
self.output = output
|
self.output = os.path.abspath(output)
|
||||||
self.log = log
|
self.log = log
|
||||||
|
|
||||||
# Initialize the conversion options that are independent of input and
|
# Initialize the conversion options that are independent of input and
|
||||||
@ -188,15 +188,15 @@ OptionRecommendation(name='language',
|
|||||||
]
|
]
|
||||||
|
|
||||||
|
|
||||||
input_fmt = os.path.splitext(input)[1]
|
input_fmt = os.path.splitext(self.input)[1]
|
||||||
if not input_fmt:
|
if not input_fmt:
|
||||||
raise ValueError('Input file must have an extension')
|
raise ValueError('Input file must have an extension')
|
||||||
input_fmt = input_fmt[1:].lower()
|
input_fmt = input_fmt[1:].lower()
|
||||||
|
|
||||||
if os.path.exists(output) and os.path.isdir(output):
|
if os.path.exists(self.output) and os.path.isdir(self.output):
|
||||||
output_fmt = 'oeb'
|
output_fmt = 'oeb'
|
||||||
else:
|
else:
|
||||||
output_fmt = os.path.splitext(output)[1]
|
output_fmt = os.path.splitext(self.output)[1]
|
||||||
if not output_fmt:
|
if not output_fmt:
|
||||||
output_fmt = '.oeb'
|
output_fmt = '.oeb'
|
||||||
output_fmt = output_fmt[1:].lower()
|
output_fmt = output_fmt[1:].lower()
|
||||||
@ -323,6 +323,9 @@ OptionRecommendation(name='language',
|
|||||||
self.oeb = self.input_plugin(open(self.input, 'rb'), self.opts,
|
self.oeb = self.input_plugin(open(self.input, 'rb'), self.opts,
|
||||||
self.input_fmt, self.log,
|
self.input_fmt, self.log,
|
||||||
accelerators, tdir)
|
accelerators, tdir)
|
||||||
|
if self.opts.debug_input is not None:
|
||||||
|
self.log('Debug input called, aborting the rest of the pipeline.')
|
||||||
|
return
|
||||||
if not hasattr(self.oeb, 'manifest'):
|
if not hasattr(self.oeb, 'manifest'):
|
||||||
self.oeb = create_oebbook(self.log, self.oeb, self.opts)
|
self.oeb = create_oebbook(self.log, self.oeb, self.opts)
|
||||||
|
|
||||||
@ -365,18 +368,20 @@ OptionRecommendation(name='language',
|
|||||||
self.output_plugin.convert(self.oeb, self.output, self.input_plugin,
|
self.output_plugin.convert(self.oeb, self.output, self.input_plugin,
|
||||||
self.opts, self.log)
|
self.opts, self.log)
|
||||||
|
|
||||||
def create_oebbook(log, opfpath, opts):
|
def create_oebbook(log, path_or_stream, opts, reader=None):
|
||||||
'''
|
'''
|
||||||
Create an OEBBook from an OPF file.
|
Create an OEBBook.
|
||||||
'''
|
'''
|
||||||
from calibre.ebooks.oeb.reader import OEBReader
|
|
||||||
from calibre.ebooks.oeb.base import OEBBook
|
from calibre.ebooks.oeb.base import OEBBook
|
||||||
html_preprocessor = HTMLPreProcessor()
|
html_preprocessor = HTMLPreProcessor()
|
||||||
reader = OEBReader()
|
|
||||||
oeb = OEBBook(log, html_preprocessor=html_preprocessor,
|
oeb = OEBBook(log, html_preprocessor=html_preprocessor,
|
||||||
pretty_print=opts.pretty_print)
|
pretty_print=opts.pretty_print)
|
||||||
# Read OEB Book into OEBBook
|
# Read OEB Book into OEBBook
|
||||||
log.info('Parsing all content...')
|
log('Parsing all content...')
|
||||||
reader(oeb, opfpath)
|
if reader is None:
|
||||||
|
from calibre.ebooks.oeb.reader import OEBReader
|
||||||
|
reader = OEBReader
|
||||||
|
|
||||||
|
reader()(oeb, path_or_stream)
|
||||||
return oeb
|
return oeb
|
||||||
|
|
||||||
|
@ -252,6 +252,14 @@ class HTMLInput(InputFormatPlugin):
|
|||||||
)
|
)
|
||||||
),
|
),
|
||||||
|
|
||||||
|
OptionRecommendation(name='dont_package',
|
||||||
|
recommended_value=False, level=OptionRecommendation.LOW,
|
||||||
|
help=_('Normally this input plugin re-arranges all the input '
|
||||||
|
'files into a standard folder hierarchy. Only use this option '
|
||||||
|
'if you know what you are doing as it can result in various '
|
||||||
|
'nasty side effects in the rest of of the conversion pipeline.'
|
||||||
|
)
|
||||||
|
),
|
||||||
])
|
])
|
||||||
|
|
||||||
def convert(self, stream, opts, file_ext, log,
|
def convert(self, stream, opts, file_ext, log,
|
||||||
@ -276,6 +284,9 @@ class HTMLInput(InputFormatPlugin):
|
|||||||
mi.render(open('metadata.opf', 'wb'))
|
mi.render(open('metadata.opf', 'wb'))
|
||||||
opfpath = os.path.abspath('metadata.opf')
|
opfpath = os.path.abspath('metadata.opf')
|
||||||
|
|
||||||
|
if opts.dont_package:
|
||||||
|
return opfpath
|
||||||
|
|
||||||
from calibre.ebooks.conversion.plumber import create_oebbook
|
from calibre.ebooks.conversion.plumber import create_oebbook
|
||||||
oeb = create_oebbook(log, opfpath, opts)
|
oeb = create_oebbook(log, opfpath, opts)
|
||||||
|
|
||||||
|
24
src/calibre/ebooks/lit/input.py
Normal file
24
src/calibre/ebooks/lit/input.py
Normal file
@ -0,0 +1,24 @@
|
|||||||
|
#!/usr/bin/env python
|
||||||
|
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
|
||||||
|
from __future__ import with_statement
|
||||||
|
|
||||||
|
__license__ = 'GPL v3'
|
||||||
|
__copyright__ = '2009, Kovid Goyal <kovid@kovidgoyal.net>'
|
||||||
|
__docformat__ = 'restructuredtext en'
|
||||||
|
|
||||||
|
from calibre.customize.conversion import InputFormatPlugin
|
||||||
|
|
||||||
|
class LITInput(InputFormatPlugin):
|
||||||
|
|
||||||
|
name = 'LIT Input'
|
||||||
|
author = 'Marshall T. Vandegrift'
|
||||||
|
description = 'Convert LIT files to HTML'
|
||||||
|
file_types = set(['lit'])
|
||||||
|
|
||||||
|
def convert(self, stream, options, file_ext, log,
|
||||||
|
accelerators):
|
||||||
|
from calibre.ebooks.lit.reader import LitReader
|
||||||
|
from calibre.ebooks.conversion.plumber import create_oebbook
|
||||||
|
return create_oebbook(log, stream, options, reader=LitReader)
|
||||||
|
|
||||||
|
|
@ -7,13 +7,12 @@ __license__ = 'GPL v3'
|
|||||||
__copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net> ' \
|
__copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net> ' \
|
||||||
'and Marshall T. Vandegrift <llasram@gmail.com>'
|
'and Marshall T. Vandegrift <llasram@gmail.com>'
|
||||||
|
|
||||||
import sys, struct, os
|
import struct, os
|
||||||
import functools
|
import functools
|
||||||
import re
|
import re
|
||||||
from urlparse import urldefrag
|
from urlparse import urldefrag
|
||||||
from cStringIO import StringIO
|
from cStringIO import StringIO
|
||||||
from urllib import unquote as urlunquote
|
from urllib import unquote as urlunquote
|
||||||
from lxml import etree
|
|
||||||
from calibre.ebooks.lit import LitError
|
from calibre.ebooks.lit import LitError
|
||||||
from calibre.ebooks.lit.maps import OPF_MAP, HTML_MAP
|
from calibre.ebooks.lit.maps import OPF_MAP, HTML_MAP
|
||||||
import calibre.ebooks.lit.mssha1 as mssha1
|
import calibre.ebooks.lit.mssha1 as mssha1
|
||||||
@ -29,12 +28,12 @@ __all__ = ["LitReader"]
|
|||||||
XML_DECL = """<?xml version="1.0" encoding="UTF-8" ?>
|
XML_DECL = """<?xml version="1.0" encoding="UTF-8" ?>
|
||||||
"""
|
"""
|
||||||
OPF_DECL = """<?xml version="1.0" encoding="UTF-8" ?>
|
OPF_DECL = """<?xml version="1.0" encoding="UTF-8" ?>
|
||||||
<!DOCTYPE package
|
<!DOCTYPE package
|
||||||
PUBLIC "+//ISBN 0-9673008-1-9//DTD OEB 1.0.1 Package//EN"
|
PUBLIC "+//ISBN 0-9673008-1-9//DTD OEB 1.0.1 Package//EN"
|
||||||
"http://openebook.org/dtds/oeb-1.0.1/oebpkg101.dtd">
|
"http://openebook.org/dtds/oeb-1.0.1/oebpkg101.dtd">
|
||||||
"""
|
"""
|
||||||
HTML_DECL = """<?xml version="1.0" encoding="UTF-8" ?>
|
HTML_DECL = """<?xml version="1.0" encoding="UTF-8" ?>
|
||||||
<!DOCTYPE html PUBLIC
|
<!DOCTYPE html PUBLIC
|
||||||
"+//ISBN 0-9673008-1-9//DTD OEB 1.0.1 Document//EN"
|
"+//ISBN 0-9673008-1-9//DTD OEB 1.0.1 Document//EN"
|
||||||
"http://openebook.org/dtds/oeb-1.0.1/oebdoc101.dtd">
|
"http://openebook.org/dtds/oeb-1.0.1/oebdoc101.dtd">
|
||||||
"""
|
"""
|
||||||
@ -73,7 +72,7 @@ def encint(bytes, remaining):
|
|||||||
val <<= 7
|
val <<= 7
|
||||||
val |= (b & 0x7f)
|
val |= (b & 0x7f)
|
||||||
if b & 0x80 == 0: break
|
if b & 0x80 == 0: break
|
||||||
return val, bytes[pos:], remaining
|
return val, bytes[pos:], remaining
|
||||||
|
|
||||||
def msguid(bytes):
|
def msguid(bytes):
|
||||||
values = struct.unpack("<LHHBBBBBBBB", bytes[:16])
|
values = struct.unpack("<LHHBBBBBBBB", bytes[:16])
|
||||||
@ -123,7 +122,7 @@ class UnBinary(object):
|
|||||||
CLOSE_ANGLE_RE = re.compile(r'(?<!--)>>(?=>>|[^>])')
|
CLOSE_ANGLE_RE = re.compile(r'(?<!--)>>(?=>>|[^>])')
|
||||||
DOUBLE_ANGLE_RE = re.compile(r'([<>])\1')
|
DOUBLE_ANGLE_RE = re.compile(r'([<>])\1')
|
||||||
EMPTY_ATOMS = ({},{})
|
EMPTY_ATOMS = ({},{})
|
||||||
|
|
||||||
def __init__(self, bin, path, manifest={}, map=HTML_MAP, atoms=EMPTY_ATOMS):
|
def __init__(self, bin, path, manifest={}, map=HTML_MAP, atoms=EMPTY_ATOMS):
|
||||||
self.manifest = manifest
|
self.manifest = manifest
|
||||||
self.tag_map, self.attr_map, self.tag_to_attr_map = map
|
self.tag_map, self.attr_map, self.tag_to_attr_map = map
|
||||||
@ -143,7 +142,7 @@ class UnBinary(object):
|
|||||||
raw = self.CLOSE_ANGLE_RE.sub(r'>', raw)
|
raw = self.CLOSE_ANGLE_RE.sub(r'>', raw)
|
||||||
raw = self.DOUBLE_ANGLE_RE.sub(r'\1', raw)
|
raw = self.DOUBLE_ANGLE_RE.sub(r'\1', raw)
|
||||||
self.raw = raw
|
self.raw = raw
|
||||||
|
|
||||||
def item_path(self, internal_id):
|
def item_path(self, internal_id):
|
||||||
try:
|
try:
|
||||||
target = self.manifest[internal_id].path
|
target = self.manifest[internal_id].path
|
||||||
@ -159,7 +158,7 @@ class UnBinary(object):
|
|||||||
index += 1
|
index += 1
|
||||||
relpath = (['..'] * (len(base) - index)) + target[index:]
|
relpath = (['..'] * (len(base) - index)) + target[index:]
|
||||||
return '/'.join(relpath)
|
return '/'.join(relpath)
|
||||||
|
|
||||||
def __unicode__(self):
|
def __unicode__(self):
|
||||||
return self.raw.decode('utf-8')
|
return self.raw.decode('utf-8')
|
||||||
|
|
||||||
@ -172,11 +171,11 @@ class UnBinary(object):
|
|||||||
in_censorship = is_goingdown = False
|
in_censorship = is_goingdown = False
|
||||||
state = 'text'
|
state = 'text'
|
||||||
flags = 0
|
flags = 0
|
||||||
|
|
||||||
while index < len(bin):
|
while index < len(bin):
|
||||||
c, index = read_utf8_char(bin, index)
|
c, index = read_utf8_char(bin, index)
|
||||||
oc = ord(c)
|
oc = ord(c)
|
||||||
|
|
||||||
if state == 'text':
|
if state == 'text':
|
||||||
if oc == 0:
|
if oc == 0:
|
||||||
state = 'get flags'
|
state = 'get flags'
|
||||||
@ -188,14 +187,14 @@ class UnBinary(object):
|
|||||||
elif c == '<':
|
elif c == '<':
|
||||||
c = '<<'
|
c = '<<'
|
||||||
buf.write(encode(c))
|
buf.write(encode(c))
|
||||||
|
|
||||||
elif state == 'get flags':
|
elif state == 'get flags':
|
||||||
if oc == 0:
|
if oc == 0:
|
||||||
state = 'text'
|
state = 'text'
|
||||||
continue
|
continue
|
||||||
flags = oc
|
flags = oc
|
||||||
state = 'get tag'
|
state = 'get tag'
|
||||||
|
|
||||||
elif state == 'get tag':
|
elif state == 'get tag':
|
||||||
state = 'text' if oc == 0 else 'get attr'
|
state = 'text' if oc == 0 else 'get attr'
|
||||||
if flags & FLAG_OPENING:
|
if flags & FLAG_OPENING:
|
||||||
@ -226,7 +225,7 @@ class UnBinary(object):
|
|||||||
if depth == 0:
|
if depth == 0:
|
||||||
raise LitError('Extra closing tag')
|
raise LitError('Extra closing tag')
|
||||||
return index
|
return index
|
||||||
|
|
||||||
elif state == 'get attr':
|
elif state == 'get attr':
|
||||||
in_censorship = False
|
in_censorship = False
|
||||||
if oc == 0:
|
if oc == 0:
|
||||||
@ -265,7 +264,7 @@ class UnBinary(object):
|
|||||||
state = 'get href length'
|
state = 'get href length'
|
||||||
else:
|
else:
|
||||||
state = 'get value length'
|
state = 'get value length'
|
||||||
|
|
||||||
elif state == 'get value length':
|
elif state == 'get value length':
|
||||||
if not in_censorship:
|
if not in_censorship:
|
||||||
buf.write('"')
|
buf.write('"')
|
||||||
@ -281,7 +280,7 @@ class UnBinary(object):
|
|||||||
continue
|
continue
|
||||||
if count < 0 or count > (len(bin) - index):
|
if count < 0 or count > (len(bin) - index):
|
||||||
raise LitError('Invalid character count %d' % count)
|
raise LitError('Invalid character count %d' % count)
|
||||||
|
|
||||||
elif state == 'get value':
|
elif state == 'get value':
|
||||||
if count == 0xfffe:
|
if count == 0xfffe:
|
||||||
if not in_censorship:
|
if not in_censorship:
|
||||||
@ -301,7 +300,7 @@ class UnBinary(object):
|
|||||||
buf.write('"')
|
buf.write('"')
|
||||||
in_censorship = False
|
in_censorship = False
|
||||||
state = 'get attr'
|
state = 'get attr'
|
||||||
|
|
||||||
elif state == 'get custom length':
|
elif state == 'get custom length':
|
||||||
count = oc - 1
|
count = oc - 1
|
||||||
if count <= 0 or count > len(bin)-index:
|
if count <= 0 or count > len(bin)-index:
|
||||||
@ -309,21 +308,21 @@ class UnBinary(object):
|
|||||||
dynamic_tag += 1
|
dynamic_tag += 1
|
||||||
state = 'get custom'
|
state = 'get custom'
|
||||||
tag_name = ''
|
tag_name = ''
|
||||||
|
|
||||||
elif state == 'get custom':
|
elif state == 'get custom':
|
||||||
tag_name += c
|
tag_name += c
|
||||||
count -= 1
|
count -= 1
|
||||||
if count == 0:
|
if count == 0:
|
||||||
buf.write(encode(tag_name))
|
buf.write(encode(tag_name))
|
||||||
state = 'get attr'
|
state = 'get attr'
|
||||||
|
|
||||||
elif state == 'get attr length':
|
elif state == 'get attr length':
|
||||||
count = oc - 1
|
count = oc - 1
|
||||||
if count <= 0 or count > (len(bin) - index):
|
if count <= 0 or count > (len(bin) - index):
|
||||||
raise LitError('Invalid character count %d' % count)
|
raise LitError('Invalid character count %d' % count)
|
||||||
buf.write(' ')
|
buf.write(' ')
|
||||||
state = 'get custom attr'
|
state = 'get custom attr'
|
||||||
|
|
||||||
elif state == 'get custom attr':
|
elif state == 'get custom attr':
|
||||||
buf.write(encode(c))
|
buf.write(encode(c))
|
||||||
count -= 1
|
count -= 1
|
||||||
@ -337,7 +336,7 @@ class UnBinary(object):
|
|||||||
raise LitError('Invalid character count %d' % count)
|
raise LitError('Invalid character count %d' % count)
|
||||||
href = ''
|
href = ''
|
||||||
state = 'get href'
|
state = 'get href'
|
||||||
|
|
||||||
elif state == 'get href':
|
elif state == 'get href':
|
||||||
href += c
|
href += c
|
||||||
count -= 1
|
count -= 1
|
||||||
@ -350,7 +349,7 @@ class UnBinary(object):
|
|||||||
buf.write(encode(u'"%s"' % path))
|
buf.write(encode(u'"%s"' % path))
|
||||||
state = 'get attr'
|
state = 'get attr'
|
||||||
return index
|
return index
|
||||||
|
|
||||||
|
|
||||||
class DirectoryEntry(object):
|
class DirectoryEntry(object):
|
||||||
def __init__(self, name, section, offset, size):
|
def __init__(self, name, section, offset, size):
|
||||||
@ -358,11 +357,11 @@ class DirectoryEntry(object):
|
|||||||
self.section = section
|
self.section = section
|
||||||
self.offset = offset
|
self.offset = offset
|
||||||
self.size = size
|
self.size = size
|
||||||
|
|
||||||
def __repr__(self):
|
def __repr__(self):
|
||||||
return "DirectoryEntry(name=%s, section=%d, offset=%d, size=%d)" \
|
return "DirectoryEntry(name=%s, section=%d, offset=%d, size=%d)" \
|
||||||
% (repr(self.name), self.section, self.offset, self.size)
|
% (repr(self.name), self.section, self.offset, self.size)
|
||||||
|
|
||||||
def __str__(self):
|
def __str__(self):
|
||||||
return repr(self)
|
return repr(self)
|
||||||
|
|
||||||
@ -382,12 +381,12 @@ class ManifestItem(object):
|
|||||||
path = os.path.normpath(path).replace('\\', '/')
|
path = os.path.normpath(path).replace('\\', '/')
|
||||||
while path.startswith('../'): path = path[3:]
|
while path.startswith('../'): path = path[3:]
|
||||||
self.path = path
|
self.path = path
|
||||||
|
|
||||||
def __eq__(self, other):
|
def __eq__(self, other):
|
||||||
if hasattr(other, 'internal'):
|
if hasattr(other, 'internal'):
|
||||||
return self.internal == other.internal
|
return self.internal == other.internal
|
||||||
return self.internal == other
|
return self.internal == other
|
||||||
|
|
||||||
def __repr__(self):
|
def __repr__(self):
|
||||||
return "ManifestItem(internal=%r, path=%r, mime_type=%r, " \
|
return "ManifestItem(internal=%r, path=%r, mime_type=%r, " \
|
||||||
"offset=%d, root=%r, state=%r)" \
|
"offset=%d, root=%r, state=%r)" \
|
||||||
@ -404,7 +403,7 @@ def preserve(function):
|
|||||||
self.stream.seek(opos)
|
self.stream.seek(opos)
|
||||||
functools.update_wrapper(wrapper, function)
|
functools.update_wrapper(wrapper, function)
|
||||||
return wrapper
|
return wrapper
|
||||||
|
|
||||||
class LitFile(object):
|
class LitFile(object):
|
||||||
PIECE_SIZE = 16
|
PIECE_SIZE = 16
|
||||||
|
|
||||||
@ -438,14 +437,14 @@ class LitFile(object):
|
|||||||
return self.stream.read(8)
|
return self.stream.read(8)
|
||||||
return property(fget=fget)
|
return property(fget=fget)
|
||||||
magic = magic()
|
magic = magic()
|
||||||
|
|
||||||
def version():
|
def version():
|
||||||
def fget(self):
|
def fget(self):
|
||||||
self.stream.seek(8)
|
self.stream.seek(8)
|
||||||
return u32(self.stream.read(4))
|
return u32(self.stream.read(4))
|
||||||
return property(fget=fget)
|
return property(fget=fget)
|
||||||
version = version()
|
version = version()
|
||||||
|
|
||||||
def hdr_len():
|
def hdr_len():
|
||||||
@preserve
|
@preserve
|
||||||
def fget(self):
|
def fget(self):
|
||||||
@ -453,7 +452,7 @@ class LitFile(object):
|
|||||||
return int32(self.stream.read(4))
|
return int32(self.stream.read(4))
|
||||||
return property(fget=fget)
|
return property(fget=fget)
|
||||||
hdr_len = hdr_len()
|
hdr_len = hdr_len()
|
||||||
|
|
||||||
def num_pieces():
|
def num_pieces():
|
||||||
@preserve
|
@preserve
|
||||||
def fget(self):
|
def fget(self):
|
||||||
@ -461,7 +460,7 @@ class LitFile(object):
|
|||||||
return int32(self.stream.read(4))
|
return int32(self.stream.read(4))
|
||||||
return property(fget=fget)
|
return property(fget=fget)
|
||||||
num_pieces = num_pieces()
|
num_pieces = num_pieces()
|
||||||
|
|
||||||
def sec_hdr_len():
|
def sec_hdr_len():
|
||||||
@preserve
|
@preserve
|
||||||
def fget(self):
|
def fget(self):
|
||||||
@ -469,7 +468,7 @@ class LitFile(object):
|
|||||||
return int32(self.stream.read(4))
|
return int32(self.stream.read(4))
|
||||||
return property(fget=fget)
|
return property(fget=fget)
|
||||||
sec_hdr_len = sec_hdr_len()
|
sec_hdr_len = sec_hdr_len()
|
||||||
|
|
||||||
def guid():
|
def guid():
|
||||||
@preserve
|
@preserve
|
||||||
def fget(self):
|
def fget(self):
|
||||||
@ -477,7 +476,7 @@ class LitFile(object):
|
|||||||
return self.stream.read(16)
|
return self.stream.read(16)
|
||||||
return property(fget=fget)
|
return property(fget=fget)
|
||||||
guid = guid()
|
guid = guid()
|
||||||
|
|
||||||
def header():
|
def header():
|
||||||
@preserve
|
@preserve
|
||||||
def fget(self):
|
def fget(self):
|
||||||
@ -488,7 +487,7 @@ class LitFile(object):
|
|||||||
return self.stream.read(size)
|
return self.stream.read(size)
|
||||||
return property(fget=fget)
|
return property(fget=fget)
|
||||||
header = header()
|
header = header()
|
||||||
|
|
||||||
@preserve
|
@preserve
|
||||||
def __len__(self):
|
def __len__(self):
|
||||||
self.stream.seek(0, 2)
|
self.stream.seek(0, 2)
|
||||||
@ -501,7 +500,7 @@ class LitFile(object):
|
|||||||
|
|
||||||
def read_content(self, offset, size):
|
def read_content(self, offset, size):
|
||||||
return self.read_raw(self.content_offset + offset, size)
|
return self.read_raw(self.content_offset + offset, size)
|
||||||
|
|
||||||
def read_secondary_header(self):
|
def read_secondary_header(self):
|
||||||
offset = self.hdr_len + (self.num_pieces * self.PIECE_SIZE)
|
offset = self.hdr_len + (self.num_pieces * self.PIECE_SIZE)
|
||||||
bytes = self.read_raw(offset, self.sec_hdr_len)
|
bytes = self.read_raw(offset, self.sec_hdr_len)
|
||||||
@ -526,12 +525,12 @@ class LitFile(object):
|
|||||||
if u32(bytes[offset+4+16:]):
|
if u32(bytes[offset+4+16:]):
|
||||||
raise LitError('This file has a 64bit content offset')
|
raise LitError('This file has a 64bit content offset')
|
||||||
self.content_offset = u32(bytes[offset+16:])
|
self.content_offset = u32(bytes[offset+16:])
|
||||||
self.timestamp = u32(bytes[offset+24:])
|
self.timestamp = u32(bytes[offset+24:])
|
||||||
self.language_id = u32(bytes[offset+28:])
|
self.language_id = u32(bytes[offset+28:])
|
||||||
offset += 48
|
offset += 48
|
||||||
if not hasattr(self, 'content_offset'):
|
if not hasattr(self, 'content_offset'):
|
||||||
raise LitError('Could not figure out the content offset')
|
raise LitError('Could not figure out the content offset')
|
||||||
|
|
||||||
def read_header_pieces(self):
|
def read_header_pieces(self):
|
||||||
src = self.header[self.hdr_len:]
|
src = self.header[self.hdr_len:]
|
||||||
for i in xrange(self.num_pieces):
|
for i in xrange(self.num_pieces):
|
||||||
@ -556,7 +555,7 @@ class LitFile(object):
|
|||||||
self.piece3_guid = piece
|
self.piece3_guid = piece
|
||||||
elif i == 4:
|
elif i == 4:
|
||||||
self.piece4_guid = piece
|
self.piece4_guid = piece
|
||||||
|
|
||||||
def read_directory(self, piece):
|
def read_directory(self, piece):
|
||||||
if not piece.startswith('IFCM'):
|
if not piece.startswith('IFCM'):
|
||||||
raise LitError('Header piece #1 is not main directory.')
|
raise LitError('Header piece #1 is not main directory.')
|
||||||
@ -760,9 +759,9 @@ class LitFile(object):
|
|||||||
raise LitError("Reset table is too short")
|
raise LitError("Reset table is too short")
|
||||||
if u32(reset_table[RESET_UCLENGTH + 4:]) != 0:
|
if u32(reset_table[RESET_UCLENGTH + 4:]) != 0:
|
||||||
raise LitError("Reset table has 64bit value for UCLENGTH")
|
raise LitError("Reset table has 64bit value for UCLENGTH")
|
||||||
|
|
||||||
result = []
|
result = []
|
||||||
|
|
||||||
window_size = 14
|
window_size = 14
|
||||||
u = u32(control[CONTROL_WINDOW_SIZE:])
|
u = u32(control[CONTROL_WINDOW_SIZE:])
|
||||||
while u > 0:
|
while u > 0:
|
||||||
@ -847,13 +846,13 @@ class LitContainer(object):
|
|||||||
|
|
||||||
def __init__(self, filename_or_stream):
|
def __init__(self, filename_or_stream):
|
||||||
self._litfile = LitFile(filename_or_stream)
|
self._litfile = LitFile(filename_or_stream)
|
||||||
|
|
||||||
def namelist(self):
|
def namelist(self):
|
||||||
return self._litfile.paths.keys()
|
return self._litfile.paths.keys()
|
||||||
|
|
||||||
def exists(self, name):
|
def exists(self, name):
|
||||||
return urlunquote(name) in self._litfile.paths
|
return urlunquote(name) in self._litfile.paths
|
||||||
|
|
||||||
def read(self, name):
|
def read(self, name):
|
||||||
entry = self._litfile.paths[urlunquote(name)] if name else None
|
entry = self._litfile.paths[urlunquote(name)] if name else None
|
||||||
if entry is None:
|
if entry is None:
|
||||||
@ -869,7 +868,7 @@ class LitContainer(object):
|
|||||||
internal = '/'.join(('/data', entry.internal))
|
internal = '/'.join(('/data', entry.internal))
|
||||||
content = self._litfile.get_file(internal)
|
content = self._litfile.get_file(internal)
|
||||||
return content
|
return content
|
||||||
|
|
||||||
def _read_meta(self):
|
def _read_meta(self):
|
||||||
path = 'content.opf'
|
path = 'content.opf'
|
||||||
raw = self._litfile.get_file('/meta')
|
raw = self._litfile.get_file('/meta')
|
||||||
|
@ -272,11 +272,7 @@ def XPath(expr):
|
|||||||
def xpath(elem, expr):
|
def xpath(elem, expr):
|
||||||
return elem.xpath(expr, namespaces=XPNSMAP)
|
return elem.xpath(expr, namespaces=XPNSMAP)
|
||||||
|
|
||||||
def _prepare_xml_for_serialization(root):
|
|
||||||
pass
|
|
||||||
|
|
||||||
def xml2str(root, pretty_print=False, strip_comments=False):
|
def xml2str(root, pretty_print=False, strip_comments=False):
|
||||||
_prepare_xml_for_serialization(root)
|
|
||||||
ans = etree.tostring(root, encoding='utf-8', xml_declaration=True,
|
ans = etree.tostring(root, encoding='utf-8', xml_declaration=True,
|
||||||
pretty_print=pretty_print)
|
pretty_print=pretty_print)
|
||||||
|
|
||||||
@ -287,7 +283,6 @@ def xml2str(root, pretty_print=False, strip_comments=False):
|
|||||||
|
|
||||||
|
|
||||||
def xml2unicode(root, pretty_print=False):
|
def xml2unicode(root, pretty_print=False):
|
||||||
_prepare_xml_for_serialization(root)
|
|
||||||
return etree.tostring(root, pretty_print=pretty_print)
|
return etree.tostring(root, pretty_print=pretty_print)
|
||||||
|
|
||||||
ASCII_CHARS = set(chr(x) for x in xrange(128))
|
ASCII_CHARS = set(chr(x) for x in xrange(128))
|
||||||
@ -321,6 +316,25 @@ def urlnormalize(href):
|
|||||||
parts = (urlquote(part) for part in parts)
|
parts = (urlquote(part) for part in parts)
|
||||||
return urlunparse(parts)
|
return urlunparse(parts)
|
||||||
|
|
||||||
|
class DummyHandler(logging.Handler):
|
||||||
|
|
||||||
|
def __init__(self):
|
||||||
|
logging.Handler.__init__(self, logging.WARNING)
|
||||||
|
self.setFormatter(logging.Formatter('%(message)s'))
|
||||||
|
self.log = None
|
||||||
|
|
||||||
|
def emit(self, record):
|
||||||
|
if self.log is not None:
|
||||||
|
msg = self.format(record)
|
||||||
|
f = self.log.error if record.levelno >= logging.ERROR \
|
||||||
|
else self.log.warn
|
||||||
|
f(msg)
|
||||||
|
|
||||||
|
|
||||||
|
_css_logger = logging.getLogger('calibre.css')
|
||||||
|
_css_logger.setLevel(logging.WARNING)
|
||||||
|
_css_log_handler = DummyHandler()
|
||||||
|
_css_logger.addHandler(_css_log_handler)
|
||||||
|
|
||||||
class OEBError(Exception):
|
class OEBError(Exception):
|
||||||
"""Generic OEB-processing error."""
|
"""Generic OEB-processing error."""
|
||||||
@ -778,7 +792,8 @@ class Manifest(object):
|
|||||||
data = self.oeb.css_preprocessor(data)
|
data = self.oeb.css_preprocessor(data)
|
||||||
data = XHTML_CSS_NAMESPACE + data
|
data = XHTML_CSS_NAMESPACE + data
|
||||||
parser = CSSParser(loglevel=logging.WARNING,
|
parser = CSSParser(loglevel=logging.WARNING,
|
||||||
fetcher=self._fetch_css)
|
fetcher=self._fetch_css,
|
||||||
|
log=_css_logger)
|
||||||
data = parser.parseString(data, href=self.href)
|
data = parser.parseString(data, href=self.href)
|
||||||
data.namespaces['h'] = XHTML_NS
|
data.namespaces['h'] = XHTML_NS
|
||||||
return data
|
return data
|
||||||
@ -1435,7 +1450,7 @@ class OEBBook(object):
|
|||||||
:attr:`pages`: List of "pages," such as indexed to a print edition of
|
:attr:`pages`: List of "pages," such as indexed to a print edition of
|
||||||
the same text.
|
the same text.
|
||||||
"""
|
"""
|
||||||
|
_css_log_handler.log = logger
|
||||||
self.encoding = encoding
|
self.encoding = encoding
|
||||||
self.html_preprocessor = html_preprocessor
|
self.html_preprocessor = html_preprocessor
|
||||||
self.css_preprocessor = css_preprocessor
|
self.css_preprocessor = css_preprocessor
|
||||||
@ -1450,6 +1465,7 @@ class OEBBook(object):
|
|||||||
self.guide = Guide(self)
|
self.guide = Guide(self)
|
||||||
self.toc = TOC()
|
self.toc = TOC()
|
||||||
self.pages = PageList()
|
self.pages = PageList()
|
||||||
|
self.auto_generated_toc = True
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def generate(cls, opts):
|
def generate(cls, opts):
|
||||||
|
@ -13,13 +13,12 @@ from PyQt4.Qt import QFontDatabase
|
|||||||
|
|
||||||
from calibre.customize.ui import available_input_formats
|
from calibre.customize.ui import available_input_formats
|
||||||
from calibre.ebooks.epub.from_html import TITLEPAGE
|
from calibre.ebooks.epub.from_html import TITLEPAGE
|
||||||
from calibre.ebooks.metadata.opf2 import OPF, OPFCreator
|
from calibre.ebooks.metadata.opf2 import OPF
|
||||||
from calibre.ptempfile import TemporaryDirectory
|
from calibre.ptempfile import TemporaryDirectory
|
||||||
from calibre.ebooks.chardet import xml_to_unicode
|
from calibre.ebooks.chardet import xml_to_unicode
|
||||||
from calibre.utils.zipfile import safe_replace, ZipFile
|
from calibre.utils.zipfile import safe_replace, ZipFile
|
||||||
from calibre.utils.config import DynamicConfig
|
from calibre.utils.config import DynamicConfig
|
||||||
from calibre.utils.logging import Log
|
from calibre.utils.logging import Log
|
||||||
from calibre import CurrentDir
|
|
||||||
|
|
||||||
def character_count(html):
|
def character_count(html):
|
||||||
'''
|
'''
|
||||||
@ -57,31 +56,21 @@ class FakeOpts(object):
|
|||||||
max_levels = 5
|
max_levels = 5
|
||||||
input_encoding = None
|
input_encoding = None
|
||||||
|
|
||||||
def html2opf(path, tdir, log):
|
|
||||||
from calibre.ebooks.html.input import get_filelist
|
|
||||||
from calibre.ebooks.metadata.meta import get_metadata
|
|
||||||
with CurrentDir(tdir):
|
|
||||||
fl = get_filelist(path, tdir, FakeOpts(), log)
|
|
||||||
mi = get_metadata(open(path, 'rb'), 'html')
|
|
||||||
mi = OPFCreator(os.getcwdu(), mi)
|
|
||||||
mi.guide = None
|
|
||||||
entries = [(f.path, 'application/xhtml+xml') for f in fl]
|
|
||||||
mi.create_manifest(entries)
|
|
||||||
mi.create_spine([f.path for f in fl])
|
|
||||||
|
|
||||||
mi.render(open('metadata.opf', 'wb'))
|
|
||||||
opfpath = os.path.abspath('metadata.opf')
|
|
||||||
|
|
||||||
return opfpath
|
|
||||||
|
|
||||||
def opf2opf(path, tdir, opts):
|
|
||||||
return path
|
|
||||||
|
|
||||||
def is_supported(path):
|
def is_supported(path):
|
||||||
ext = os.path.splitext(path)[1].replace('.', '').lower()
|
ext = os.path.splitext(path)[1].replace('.', '').lower()
|
||||||
ext = re.sub(r'(x{0,1})htm(l{0,1})', 'html', ext)
|
ext = re.sub(r'(x{0,1})htm(l{0,1})', 'html', ext)
|
||||||
return ext in available_input_formats()
|
return ext in available_input_formats()
|
||||||
|
|
||||||
|
|
||||||
|
def write_oebbook(oeb, path):
|
||||||
|
from calibre.ebooks.oeb.writer import OEBWriter
|
||||||
|
from calibre import walk
|
||||||
|
w = OEBWriter()
|
||||||
|
w(oeb, path)
|
||||||
|
for f in walk(path):
|
||||||
|
if f.endswith('.opf'):
|
||||||
|
return f
|
||||||
|
|
||||||
class EbookIterator(object):
|
class EbookIterator(object):
|
||||||
|
|
||||||
CHARACTERS_PER_PAGE = 1000
|
CHARACTERS_PER_PAGE = 1000
|
||||||
@ -131,17 +120,16 @@ class EbookIterator(object):
|
|||||||
def __enter__(self):
|
def __enter__(self):
|
||||||
self._tdir = TemporaryDirectory('_ebook_iter')
|
self._tdir = TemporaryDirectory('_ebook_iter')
|
||||||
self.base = self._tdir.__enter__()
|
self.base = self._tdir.__enter__()
|
||||||
if self.ebook_ext == 'opf':
|
from calibre.ebooks.conversion.plumber import Plumber
|
||||||
self.pathtoopf = self.pathtoebook
|
plumber = Plumber(self.pathtoebook, self.base, self.log)
|
||||||
elif self.ebook_ext == 'html':
|
plumber.setup_options()
|
||||||
self.pathtoopf = html2opf(self.pathtoebook, self.base, self.log)
|
if hasattr(plumber.opts, 'dont_package'):
|
||||||
else:
|
plumber.opts.dont_package = True
|
||||||
from calibre.ebooks.conversion.plumber import Plumber
|
self.pathtoopf = plumber.input_plugin(open(plumber.input, 'rb'),
|
||||||
plumber = Plumber(self.pathtoebook, self.base, self.log)
|
plumber.opts, plumber.input_fmt, self.log,
|
||||||
plumber.setup_options()
|
{}, self.base)
|
||||||
self.pathtoopf = plumber.input_plugin(open(plumber.input, 'rb'),
|
if hasattr(self.pathtoopf, 'manifest'):
|
||||||
plumber.opts, plumber.input_fmt, self.log,
|
self.pathtoopf = write_oebbook(self.pathtoebook, self._tdir)
|
||||||
{}, self.base)
|
|
||||||
|
|
||||||
|
|
||||||
self.opf = OPF(self.pathtoopf, os.path.dirname(self.pathtoopf))
|
self.opf = OPF(self.pathtoopf, os.path.dirname(self.pathtoopf))
|
||||||
|
@ -16,7 +16,6 @@ class OEBOutput(OutputFormatPlugin):
|
|||||||
author = 'Kovid Goyal'
|
author = 'Kovid Goyal'
|
||||||
file_type = 'oeb'
|
file_type = 'oeb'
|
||||||
|
|
||||||
|
|
||||||
def convert(self, oeb_book, output_path, input_plugin, opts, log):
|
def convert(self, oeb_book, output_path, input_plugin, opts, log):
|
||||||
self.log, self.opts = log, opts
|
self.log, self.opts = log, opts
|
||||||
if not os.path.exists(output_path):
|
if not os.path.exists(output_path):
|
||||||
|
@ -349,6 +349,7 @@ class OEBReader(object):
|
|||||||
def _toc_from_ncx(self, item):
|
def _toc_from_ncx(self, item):
|
||||||
if item is None:
|
if item is None:
|
||||||
return False
|
return False
|
||||||
|
self.log.debug('Reading TOC from NCX...')
|
||||||
ncx = item.data
|
ncx = item.data
|
||||||
title = ''.join(xpath(ncx, 'ncx:docTitle/ncx:text/text()'))
|
title = ''.join(xpath(ncx, 'ncx:docTitle/ncx:text/text()'))
|
||||||
title = COLLAPSE_RE.sub(' ', title.strip())
|
title = COLLAPSE_RE.sub(' ', title.strip())
|
||||||
@ -364,6 +365,7 @@ class OEBReader(object):
|
|||||||
result = xpath(opf, 'o2:tours/o2:tour')
|
result = xpath(opf, 'o2:tours/o2:tour')
|
||||||
if not result:
|
if not result:
|
||||||
return False
|
return False
|
||||||
|
self.log.debug('Reading TOC from tour...')
|
||||||
tour = result[0]
|
tour = result[0]
|
||||||
toc = self.oeb.toc
|
toc = self.oeb.toc
|
||||||
toc.title = tour.get('title')
|
toc.title = tour.get('title')
|
||||||
@ -384,6 +386,7 @@ class OEBReader(object):
|
|||||||
def _toc_from_html(self, opf):
|
def _toc_from_html(self, opf):
|
||||||
if 'toc' not in self.oeb.guide:
|
if 'toc' not in self.oeb.guide:
|
||||||
return False
|
return False
|
||||||
|
self.log.debug('Reading TOC from HTML...')
|
||||||
itempath, frag = urldefrag(self.oeb.guide['toc'].href)
|
itempath, frag = urldefrag(self.oeb.guide['toc'].href)
|
||||||
item = self.oeb.manifest.hrefs[itempath]
|
item = self.oeb.manifest.hrefs[itempath]
|
||||||
html = item.data
|
html = item.data
|
||||||
@ -414,6 +417,7 @@ class OEBReader(object):
|
|||||||
return True
|
return True
|
||||||
|
|
||||||
def _toc_from_spine(self, opf):
|
def _toc_from_spine(self, opf):
|
||||||
|
self.log.warn('Generating default TOC from spine...')
|
||||||
toc = self.oeb.toc
|
toc = self.oeb.toc
|
||||||
titles = []
|
titles = []
|
||||||
headers = []
|
headers = []
|
||||||
@ -441,11 +445,14 @@ class OEBReader(object):
|
|||||||
return True
|
return True
|
||||||
|
|
||||||
def _toc_from_opf(self, opf, item):
|
def _toc_from_opf(self, opf, item):
|
||||||
|
self.oeb.auto_generated_toc = False
|
||||||
if self._toc_from_ncx(item): return
|
if self._toc_from_ncx(item): return
|
||||||
if self._toc_from_tour(opf): return
|
# Prefer HTML to tour based TOC, since several LIT files
|
||||||
self.logger.warn('No metadata table of contents found')
|
# have good HTML TOCs but bad tour based TOCs
|
||||||
if self._toc_from_html(opf): return
|
if self._toc_from_html(opf): return
|
||||||
|
if self._toc_from_tour(opf): return
|
||||||
self._toc_from_spine(opf)
|
self._toc_from_spine(opf)
|
||||||
|
self.oeb.auto_generated_toc = True
|
||||||
|
|
||||||
def _pages_from_ncx(self, opf, item):
|
def _pages_from_ncx(self, opf, item):
|
||||||
if item is None:
|
if item is None:
|
||||||
|
@ -51,8 +51,8 @@ class Split(object):
|
|||||||
self.log = oeb.log
|
self.log = oeb.log
|
||||||
self.map = {}
|
self.map = {}
|
||||||
self.page_break_selectors = None
|
self.page_break_selectors = None
|
||||||
for item in self.oeb.manifest.items:
|
for item in list(self.oeb.manifest.items):
|
||||||
if etree.iselement(item.data):
|
if item.spine_position is not None and etree.iselement(item.data):
|
||||||
self.split_item(item)
|
self.split_item(item)
|
||||||
|
|
||||||
self.fix_links()
|
self.fix_links()
|
||||||
@ -74,31 +74,34 @@ class Split(object):
|
|||||||
self.page_break_selectors = set([])
|
self.page_break_selectors = set([])
|
||||||
stylesheets = [x.data for x in self.oeb.manifest if x.media_type in
|
stylesheets = [x.data for x in self.oeb.manifest if x.media_type in
|
||||||
OEB_STYLES]
|
OEB_STYLES]
|
||||||
page_break_selectors = set([])
|
for rule in rules(stylesheets):
|
||||||
for rule in rules(stylesheets):
|
before = getattr(rule.style.getPropertyCSSValue(
|
||||||
before = getattr(rule.style.getPropertyCSSValue(
|
'page-break-before'), 'cssText', '').strip().lower()
|
||||||
'page-break-before'), 'cssText', '').strip().lower()
|
after = getattr(rule.style.getPropertyCSSValue(
|
||||||
after = getattr(rule.style.getPropertyCSSValue(
|
'page-break-after'), 'cssText', '').strip().lower()
|
||||||
'page-break-after'), 'cssText', '').strip().lower()
|
try:
|
||||||
try:
|
if before and before != 'avoid':
|
||||||
if before and before != 'avoid':
|
self.page_break_selectors.add((CSSSelector(rule.selectorText),
|
||||||
page_break_selectors.add((CSSSelector(rule.selectorText),
|
True))
|
||||||
True))
|
except:
|
||||||
except:
|
pass
|
||||||
pass
|
try:
|
||||||
try:
|
if after and after != 'avoid':
|
||||||
if after and after != 'avoid':
|
self.page_break_selectors.add((CSSSelector(rule.selectorText),
|
||||||
page_break_selectors.add((CSSSelector(rule.selectorText),
|
False))
|
||||||
False))
|
except:
|
||||||
except:
|
pass
|
||||||
pass
|
|
||||||
|
|
||||||
page_breaks = set([])
|
page_breaks = set([])
|
||||||
for selector, before in page_break_selectors:
|
for selector, before in self.page_break_selectors:
|
||||||
for elem in selector(item.data):
|
body = item.data.xpath('//h:body', namespaces=NAMESPACES)
|
||||||
if before:
|
if not body:
|
||||||
elem.set('pb_before', '1')
|
continue
|
||||||
page_breaks.add(elem)
|
for elem in selector(body[0]):
|
||||||
|
if elem not in body:
|
||||||
|
if before:
|
||||||
|
elem.set('pb_before', '1')
|
||||||
|
page_breaks.add(elem)
|
||||||
|
|
||||||
for i, elem in enumerate(item.data.iter()):
|
for i, elem in enumerate(item.data.iter()):
|
||||||
elem.set('pb_order', str(i))
|
elem.set('pb_order', str(i))
|
||||||
@ -136,8 +139,10 @@ class Split(object):
|
|||||||
if href in self.map:
|
if href in self.map:
|
||||||
anchor_map = self.map[href]
|
anchor_map = self.map[href]
|
||||||
nhref = anchor_map[frag if frag else None]
|
nhref = anchor_map[frag if frag else None]
|
||||||
|
nhref = self.current_item.relhref(nhref)
|
||||||
if frag:
|
if frag:
|
||||||
nhref = '#'.join(href, frag)
|
nhref = '#'.join((nhref, frag))
|
||||||
|
|
||||||
return nhref
|
return nhref
|
||||||
return url
|
return url
|
||||||
|
|
||||||
@ -153,7 +158,7 @@ class FlowSplitter(object):
|
|||||||
self.page_breaks = page_breaks
|
self.page_breaks = page_breaks
|
||||||
self.page_break_ids = page_break_ids
|
self.page_break_ids = page_break_ids
|
||||||
self.max_flow_size = max_flow_size
|
self.max_flow_size = max_flow_size
|
||||||
self.base = item.abshref(item.href)
|
self.base = item.href
|
||||||
|
|
||||||
base, ext = os.path.splitext(self.base)
|
base, ext = os.path.splitext(self.base)
|
||||||
self.base = base.replace('%', '%%')+'_split_%d'+ext
|
self.base = base.replace('%', '%%')+'_split_%d'+ext
|
||||||
@ -192,9 +197,9 @@ class FlowSplitter(object):
|
|||||||
self.trees = []
|
self.trees = []
|
||||||
tree = orig_tree
|
tree = orig_tree
|
||||||
for pattern, before in ordered_ids:
|
for pattern, before in ordered_ids:
|
||||||
self.log.debug('\t\tSplitting on page-break')
|
|
||||||
elem = pattern(tree)
|
elem = pattern(tree)
|
||||||
if elem:
|
if elem:
|
||||||
|
self.log.debug('\t\tSplitting on page-break')
|
||||||
before, after = self.do_split(tree, elem[0], before)
|
before, after = self.do_split(tree, elem[0], before)
|
||||||
self.trees.append(before)
|
self.trees.append(before)
|
||||||
tree = after
|
tree = after
|
||||||
@ -414,13 +419,14 @@ class FlowSplitter(object):
|
|||||||
elem.attrib.pop(SPLIT_ATTR, None)
|
elem.attrib.pop(SPLIT_ATTR, None)
|
||||||
elem.attrib.pop(SPLIT_POINT_ATTR, '0')
|
elem.attrib.pop(SPLIT_POINT_ATTR, '0')
|
||||||
|
|
||||||
spine_pos = self.item.spine_pos
|
spine_pos = self.item.spine_position
|
||||||
for current, tree in zip(map(reversed, (self.files, self.trees))):
|
for current, tree in zip(*map(reversed, (self.files, self.trees))):
|
||||||
for a in tree.getroot().xpath('//h:a[@href]', namespaces=NAMESPACES):
|
for a in tree.getroot().xpath('//h:a[@href]', namespaces=NAMESPACES):
|
||||||
href = a.get('href').strip()
|
href = a.get('href').strip()
|
||||||
if href.startswith('#'):
|
if href.startswith('#'):
|
||||||
anchor = href[1:]
|
anchor = href[1:]
|
||||||
file = self.anchor_map[anchor]
|
file = self.anchor_map[anchor]
|
||||||
|
file = self.item.relhref(file)
|
||||||
if file != current:
|
if file != current:
|
||||||
a.set('href', file+href)
|
a.set('href', file+href)
|
||||||
|
|
||||||
@ -430,12 +436,12 @@ class FlowSplitter(object):
|
|||||||
self.oeb.spine.insert(spine_pos, new_item, self.item.linear)
|
self.oeb.spine.insert(spine_pos, new_item, self.item.linear)
|
||||||
|
|
||||||
if self.oeb.guide:
|
if self.oeb.guide:
|
||||||
for ref in self.oeb.guide:
|
for ref in self.oeb.guide.values():
|
||||||
href, frag = urldefrag(ref.href)
|
href, frag = urldefrag(ref.href)
|
||||||
if href == self.item.href:
|
if href == self.item.href:
|
||||||
nhref = self.anchor_map[frag if frag else None]
|
nhref = self.anchor_map[frag if frag else None]
|
||||||
if frag:
|
if frag:
|
||||||
nhref = '#'.join(nhref, frag)
|
nhref = '#'.join((nhref, frag))
|
||||||
ref.href = nhref
|
ref.href = nhref
|
||||||
|
|
||||||
def fix_toc_entry(toc):
|
def fix_toc_entry(toc):
|
||||||
@ -444,7 +450,7 @@ class FlowSplitter(object):
|
|||||||
if href == self.item.href:
|
if href == self.item.href:
|
||||||
nhref = self.anchor_map[frag if frag else None]
|
nhref = self.anchor_map[frag if frag else None]
|
||||||
if frag:
|
if frag:
|
||||||
nhref = '#'.join(nhref, frag)
|
nhref = '#'.join((nhref, frag))
|
||||||
toc.href = nhref
|
toc.href = nhref
|
||||||
for x in toc:
|
for x in toc:
|
||||||
fix_toc_entry(x)
|
fix_toc_entry(x)
|
||||||
|
@ -49,7 +49,7 @@ class OEBWriter(object):
|
|||||||
|
|
||||||
def __call__(self, oeb, path):
|
def __call__(self, oeb, path):
|
||||||
"""
|
"""
|
||||||
Read the book in the :class:`OEBBook` object :param:`oeb` to a file
|
Write the book in the :class:`OEBBook` object :param:`oeb` to a folder
|
||||||
at :param:`path`.
|
at :param:`path`.
|
||||||
"""
|
"""
|
||||||
version = int(self.version[0])
|
version = int(self.version[0])
|
||||||
|
Loading…
x
Reference in New Issue
Block a user