diff --git a/src/calibre/customize/builtins.py b/src/calibre/customize/builtins.py
index 08824a3591..a56d13fd60 100644
--- a/src/calibre/customize/builtins.py
+++ b/src/calibre/customize/builtins.py
@@ -281,6 +281,7 @@ from calibre.ebooks.mobi.input import MOBIInput
from calibre.ebooks.pdf.input import PDFInput
from calibre.ebooks.txt.input import TXTInput
from calibre.ebooks.lit.input import LITInput
+from calibre.ebooks.fb2.input import FB2Input
from calibre.ebooks.html.input import HTMLInput
from calibre.ebooks.oeb.output import OEBOutput
from calibre.ebooks.txt.output import TXTOutput
@@ -288,7 +289,8 @@ from calibre.ebooks.pdf.output import PDFOutput
from calibre.customize.profiles import input_profiles, output_profiles
plugins = [HTML2ZIP, EPUBInput, MOBIInput, PDFInput, HTMLInput,
- TXTInput, OEBOutput, TXTOutput, PDFOutput, LITInput]
+ TXTInput, OEBOutput, TXTOutput, PDFOutput, LITInput,
+ FB2Input]
plugins += [x for x in list(locals().values()) if isinstance(x, type) and \
x.__name__.endswith('MetadataReader')]
plugins += [x for x in list(locals().values()) if isinstance(x, type) and \
diff --git a/src/calibre/ebooks/conversion/cli.py b/src/calibre/ebooks/conversion/cli.py
index b7336ab30a..6d5401278a 100644
--- a/src/calibre/ebooks/conversion/cli.py
+++ b/src/calibre/ebooks/conversion/cli.py
@@ -119,6 +119,24 @@ def add_pipeline_options(parser, plumber):
]
),
+ 'STRUCTURE DETECTION' : (
+ _('Control auto-detection of document structure.'),
+ [
+ 'dont_split_on_page_breaks', 'chapter', 'chapter_mark',
+ ]
+ ),
+
+ 'TABLE OF CONTENTS' : (
+ _('Control the automatic generation of a Table of Contents. By '
+ 'default, if the source file has a Table of Contents, it will '
+ 'be used in preference to the automatically generated one.'),
+ [
+ 'level1_toc', 'level2_toc', 'level3_toc',
+ 'toc_threshold', 'max_toc_links', 'no_chapters_in_toc',
+ 'use_auto_toc',
+ ]
+ ),
+
'METADATA' : (_('Options to set metadata in the output'),
plumber.metadata_option_names,
),
@@ -130,7 +148,8 @@ def add_pipeline_options(parser, plumber):
}
- group_order = ['', 'LOOK AND FEEL', 'METADATA', 'DEBUG']
+ group_order = ['', 'LOOK AND FEEL', 'STRUCTURE DETECTION',
+ 'TABLE OF CONTENTS', 'METADATA', 'DEBUG']
for group in group_order:
desc, options = groups[group]
@@ -163,6 +182,10 @@ def main(args=sys.argv):
add_pipeline_options(parser, plumber)
opts = parser.parse_args(args)[0]
+ y = lambda q : os.path.abspath(os.path.expanduser(q))
+ for x in ('read_metadata_from_opf', 'cover'):
+ if getattr(opts, x, None) is not None:
+ setattr(opts, x, y(getattr(opts, x)))
recommendations = [(n.dest, getattr(opts, n.dest),
OptionRecommendation.HIGH) \
for n in parser.options_iter()
diff --git a/src/calibre/ebooks/conversion/plumber.py b/src/calibre/ebooks/conversion/plumber.py
index 1edeed8d9c..453591e433 100644
--- a/src/calibre/ebooks/conversion/plumber.py
+++ b/src/calibre/ebooks/conversion/plumber.py
@@ -121,6 +121,88 @@ OptionRecommendation(name='dont_split_on_page_breaks',
)
),
+OptionRecommendation(name='level1_toc',
+ recommended_value=None, level=OptionRecommendation.LOW,
+ help=_('XPath expression that specifies all tags that '
+ 'should be added to the Table of Contents at level one. If '
+ 'this is specified, it takes precedence over other forms '
+ 'of auto-detection.'
+ )
+ ),
+
+OptionRecommendation(name='level2_toc',
+ recommended_value=None, level=OptionRecommendation.LOW,
+ help=_('XPath expression that specifies all tags that should be '
+ 'added to the Table of Contents at level two. Each entry is added '
+ 'under the previous level one entry.'
+ )
+ ),
+
+OptionRecommendation(name='level3_toc',
+ recommended_value=None, level=OptionRecommendation.LOW,
+ help=_('XPath expression that specifies all tags that should be '
+ 'added to the Table of Contents at level three. Each entry '
+ 'is added under the previous level two entry.'
+ )
+ ),
+
+OptionRecommendation(name='use_auto_toc',
+ recommended_value=False, level=OptionRecommendation.LOW,
+ help=_('Normally, if the source file already has a Table of '
+ 'Contents, it is used in preference to the auto-generated one. '
+ 'With this option, the auto-generated one is always used.'
+ )
+ ),
+
+OptionRecommendation(name='no_chapters_in_toc',
+ recommended_value=False, level=OptionRecommendation.LOW,
+ help=_("Don't add auto-detected chapters to the Table of "
+ 'Contents.'
+ )
+ ),
+
+OptionRecommendation(name='toc_threshold',
+ recommended_value=6, level=OptionRecommendation.LOW,
+ help=_(
+ 'If fewer than this number of chapters is detected, then links '
+ 'are added to the Table of Contents. Default: %default')
+ ),
+
+OptionRecommendation(name='max_toc_links',
+ recommended_value=50, level=OptionRecommendation.LOW,
+ help=_('Maximum number of links to insert into the TOC. Set to 0 '
+ 'to disable. Default is: %default. Links are only added to the '
+ 'TOC if less than the threshold number of chapters were detected.'
+ )
+ ),
+
+OptionRecommendation(name='chapter',
+ recommended_value="//*[((name()='h1' or name()='h2') and "
+ "re:test(., 'chapter|book|section|part', 'i')) or @class "
+ "= 'chapter']", level=OptionRecommendation.LOW,
+ help=_('An XPath expression to detect chapter titles. The default '
+ 'is to consider
or tags that contain the words '
+ '"chapter","book","section" or "part" as chapter titles as '
+ 'well as any tags that have class="chapter". The expression '
+ 'used must evaluate to a list of elements. To disable chapter '
+ 'detection, use the expression "/". See the XPath Tutorial '
+ 'in the calibre User Manual for further help on using this '
+ 'feature.'
+ )
+ ),
+
+OptionRecommendation(name='chapter_mark',
+ recommended_value='pagebreak', level=OptionRecommendation.LOW,
+ choices=['pagebreak', 'rule', 'both', 'none'],
+ help=_('Specify how to mark detected chapters. A value of '
+ '"pagebreak" will insert page breaks before chapters. '
+ 'A value of "rule" will insert a line before chapters. '
+ 'A value of "none" will disable chapter marking and a '
+ 'value of "both" will use both page breaks and lines '
+ 'to mark chapters.')
+ ),
+
+
OptionRecommendation(name='read_metadata_from_opf',
recommended_value=None, level=OptionRecommendation.LOW,
@@ -130,6 +212,7 @@ OptionRecommendation(name='read_metadata_from_opf',
'file.')
),
+
OptionRecommendation(name='title',
recommended_value=None, level=OptionRecommendation.LOW,
help=_('Set the title.')),
@@ -237,6 +320,7 @@ OptionRecommendation(name='language',
rec = self.get_option_by_name(name)
if rec is not None and rec.level <= level:
rec.recommended_value = val
+ rec.level = level
def merge_ui_recommendations(self, recommendations):
'''
@@ -248,6 +332,7 @@ OptionRecommendation(name='language',
rec = self.get_option_by_name(name)
if rec is not None and rec.level <= level and rec.level < rec.HIGH:
rec.recommended_value = val
+ rec.level = level
def read_user_metadata(self):
'''
@@ -332,6 +417,9 @@ OptionRecommendation(name='language',
self.opts.source = self.opts.input_profile
self.opts.dest = self.opts.output_profile
+ from calibre.ebooks.oeb.transforms.structure import DetectStructure
+ DetectStructure()(self.oeb, self.opts)
+
from calibre.ebooks.oeb.transforms.flatcss import CSSFlattener
fbase = self.opts.base_font_size
if fbase == 0:
@@ -364,6 +452,8 @@ OptionRecommendation(name='language',
trimmer = ManifestTrimmer()
trimmer(self.oeb, self.opts)
+ self.oeb.toc.rationalize_play_orders()
+
self.log.info('Creating %s...'%self.output_plugin.name)
self.output_plugin.convert(self.oeb, self.output, self.input_plugin,
self.opts, self.log)
@@ -384,4 +474,3 @@ def create_oebbook(log, path_or_stream, opts, reader=None):
reader()(oeb, path_or_stream)
return oeb
-
diff --git a/src/calibre/ebooks/epub/from_any.py b/src/calibre/ebooks/epub/from_any.py
index b3e5281525..196ed59646 100644
--- a/src/calibre/ebooks/epub/from_any.py
+++ b/src/calibre/ebooks/epub/from_any.py
@@ -15,88 +15,15 @@ from calibre.ebooks import DRMError
from calibre.ebooks.epub import config as common_config
from calibre.ebooks.epub.from_html import convert as html2epub, find_html_index
from calibre.ptempfile import TemporaryDirectory
-from calibre.ebooks.metadata import MetaInformation
-from calibre.ebooks.metadata.opf2 import OPFCreator, OPF
from calibre.utils.zipfile import ZipFile
from calibre.customize.ui import run_plugins_on_preprocess
-def lit2opf(path, tdir, opts):
- from calibre.ebooks.lit.reader import LitReader
- print 'Exploding LIT file:', path
- reader = LitReader(path)
- reader.extract_content(tdir, False)
- opf = None
- for opf in walk(tdir):
- if opf.lower().endswith('.opf'):
- break
- if not opf.endswith('.opf'):
- opf = None
- if opf is not None: # Check for url-quoted filenames
- _opf = OPF(opf, os.path.dirname(opf))
- replacements = []
- for item in _opf.itermanifest():
- href = item.get('href', '')
- path = os.path.join(os.path.dirname(opf), *(href.split('/')))
- if not os.path.exists(path) and os.path.exists(path.replace('&', '%26')):
- npath = path
- path = path.replace('&', '%26')
- replacements.append((path, npath))
- if replacements:
- print 'Fixing quoted filenames...'
- for path, npath in replacements:
- if os.path.exists(path):
- os.rename(path, npath)
- for f in walk(tdir):
- with open(f, 'r+b') as f:
- raw = f.read()
- for path, npath in replacements:
- raw = raw.replace(os.path.basename(path), os.path.basename(npath))
- f.seek(0)
- f.truncate()
- f.write(raw)
- return opf
-def mobi2opf(path, tdir, opts):
- from calibre.ebooks.mobi.reader import MobiReader
- print 'Exploding MOBI file:', path.encode('utf-8') if isinstance(path, unicode) else path
- reader = MobiReader(path)
- reader.extract_content(tdir)
- files = list(walk(tdir))
- opts.encoding = 'utf-8'
- for f in files:
- if f.lower().endswith('.opf'):
- return f
- html_pat = re.compile(r'\.(x){0,1}htm(l){0,1}', re.IGNORECASE)
- hf = [f for f in files if html_pat.match(os.path.splitext(f)[1]) is not None]
- mi = MetaInformation(os.path.splitext(os.path.basename(path))[0], [_('Unknown')])
- opf = OPFCreator(tdir, mi)
- opf.create_manifest([(hf[0], None)])
- opf.create_spine([hf[0]])
- ans = os.path.join(tdir, 'metadata.opf')
- opf.render(open(ans, 'wb'))
- return ans
-
-def fb22opf(path, tdir, opts):
- from calibre.ebooks.lrf.fb2.convert_from import to_html
- print 'Converting FB2 to HTML...'
- return to_html(path, tdir)
-
def rtf2opf(path, tdir, opts):
from calibre.ebooks.lrf.rtf.convert_from import generate_html
generate_html(path, tdir)
return os.path.join(tdir, 'metadata.opf')
-def txt2opf(path, tdir, opts):
- from calibre.ebooks.lrf.txt.convert_from import generate_html
- generate_html(path, opts.encoding, tdir)
- return os.path.join(tdir, 'metadata.opf')
-
-def pdf2opf(path, tdir, opts):
- from calibre.ebooks.lrf.pdf.convert_from import generate_html
- generate_html(path, tdir)
- opts.dont_split_on_page_breaks = True
- return os.path.join(tdir, 'metadata.opf')
-
def epub2opf(path, tdir, opts):
zf = ZipFile(path)
zf.extractall(tdir)
@@ -110,35 +37,23 @@ def epub2opf(path, tdir, opts):
if opf and os.path.exists(encfile):
if not process_encryption(encfile, opf):
raise DRMError(os.path.basename(path))
-
+
if opf is None:
raise ValueError('%s is not a valid EPUB file'%path)
return opf
-
+
def odt2epub(path, tdir, opts):
from calibre.ebooks.odt.to_oeb import Extract
opts.encoding = 'utf-8'
return Extract()(path, tdir)
-MAP = {
- 'lit' : lit2opf,
- 'mobi' : mobi2opf,
- 'prc' : mobi2opf,
- 'azw' : mobi2opf,
- 'fb2' : fb22opf,
- 'rtf' : rtf2opf,
- 'txt' : txt2opf,
- 'pdf' : pdf2opf,
- 'epub' : epub2opf,
- 'odt' : odt2epub,
- }
-SOURCE_FORMATS = ['lit', 'mobi', 'prc', 'azw', 'fb2', 'odt', 'rtf',
+SOURCE_FORMATS = ['lit', 'mobi', 'prc', 'azw', 'fb2', 'odt', 'rtf',
'txt', 'pdf', 'rar', 'zip', 'oebzip', 'htm', 'html', 'epub']
def unarchive(path, tdir):
extract(path, tdir)
files = list(walk(tdir))
-
+
for ext in ['opf'] + list(MAP.keys()):
for f in files:
if f.lower().endswith('.'+ext):
@@ -147,32 +62,32 @@ def unarchive(path, tdir):
return f, ext
return find_html_index(files)
-def any2epub(opts, path, notification=None, create_epub=True,
+def any2epub(opts, path, notification=None, create_epub=True,
oeb_cover=False, extract_to=None):
path = run_plugins_on_preprocess(path)
ext = os.path.splitext(path)[1]
if not ext:
raise ValueError('Unknown file type: '+path)
ext = ext.lower()[1:]
-
+
if opts.output is None:
opts.output = os.path.splitext(os.path.basename(path))[0]+'.epub'
-
+
with nested(TemporaryDirectory('_any2epub1'), TemporaryDirectory('_any2epub2')) as (tdir1, tdir2):
if ext in ['rar', 'zip', 'oebzip']:
path, ext = unarchive(path, tdir1)
print 'Found %s file in archive'%(ext.upper())
-
+
if ext in MAP.keys():
path = MAP[ext](path, tdir2, opts)
ext = 'opf'
-
-
+
+
if re.match(r'((x){0,1}htm(l){0,1})|opf', ext) is None:
raise ValueError('Conversion from %s is not supported'%ext.upper())
-
+
print 'Creating EPUB file...'
- html2epub(path, opts, notification=notification,
+ html2epub(path, opts, notification=notification,
create_epub=create_epub, oeb_cover=oeb_cover,
extract_to=extract_to)
diff --git a/src/calibre/ebooks/lrf/fb2/__init__.py b/src/calibre/ebooks/fb2/__init__.py
similarity index 100%
rename from src/calibre/ebooks/lrf/fb2/__init__.py
rename to src/calibre/ebooks/fb2/__init__.py
diff --git a/src/calibre/ebooks/lrf/fb2/fb2.xsl b/src/calibre/ebooks/fb2/fb2.xsl
similarity index 100%
rename from src/calibre/ebooks/lrf/fb2/fb2.xsl
rename to src/calibre/ebooks/fb2/fb2.xsl
diff --git a/src/calibre/ebooks/fb2/input.py b/src/calibre/ebooks/fb2/input.py
new file mode 100644
index 0000000000..d96758a4bd
--- /dev/null
+++ b/src/calibre/ebooks/fb2/input.py
@@ -0,0 +1,74 @@
+from __future__ import with_statement
+__license__ = 'GPL v3'
+__copyright__ = '2008, Anatoly Shipitsin '
+"""
+Convert .fb2 files to .lrf
+"""
+import os
+from base64 import b64decode
+from lxml import etree
+
+from calibre.customize.conversion import InputFormatPlugin, OptionRecommendation
+from calibre import guess_type
+
+FB2NS = 'http://www.gribuser.ru/xml/fictionbook/2.0'
+
+class FB2Input(InputFormatPlugin):
+
+ name = 'FB2 Input'
+ author = 'Anatoly Shipitsin'
+ description = 'Convert FB2 files to HTML'
+ file_types = set(['fb2'])
+
+ recommendations = set([
+ ('level1_toc', '//h:h1', OptionRecommendation.MED),
+ ('level2_toc', '//h:h2', OptionRecommendation.MED),
+ ('level3_toc', '//h:h3', OptionRecommendation.MED),
+ ])
+
+ def convert(self, stream, options, file_ext, log,
+ accelerators):
+ from calibre.resources import fb2_xsl
+ from calibre.ebooks.metadata.opf2 import OPFCreator
+ from calibre.ebooks.metadata.meta import get_metadata
+ from calibre.ebooks.oeb.base import XLINK_NS
+ NAMESPACES = {'f':FB2NS, 'l':XLINK_NS}
+
+ log.debug('Parsing XML...')
+ parser = etree.XMLParser(recover=True, no_network=True)
+ doc = etree.parse(stream, parser)
+ self.extract_embedded_content(doc)
+ log.debug('Converting XML to HTML...')
+ styledoc = etree.fromstring(fb2_xsl)
+
+ transform = etree.XSLT(styledoc)
+ result = transform(doc)
+ open('index.xhtml', 'wb').write(transform.tostring(result))
+ stream.seek(0)
+ mi = get_metadata(stream, 'fb2')
+ if not mi.title:
+ mi.title = _('Unknown')
+ if not mi.authors:
+ mi.authors = [_('Unknown')]
+ opf = OPFCreator(os.getcwdu(), mi)
+ entries = [(f, guess_type(f)[0]) for f in os.listdir('.')]
+ opf.create_manifest(entries)
+ opf.create_spine(['index.xhtml'])
+
+ for img in doc.xpath('//f:coverpage/f:image', namespaces=NAMESPACES):
+ href = img.get('{%s}href'%XLINK_NS, img.get('href', None))
+ if href is not None:
+ if href.startswith('#'):
+ href = href[1:]
+ opf.guide.set_cover(os.path.abspath(href))
+
+ opf.render(open('metadata.opf', 'wb'))
+ return os.path.join(os.getcwd(), 'metadata.opf')
+
+ def extract_embedded_content(self, doc):
+ for elem in doc.xpath('./*'):
+ if 'binary' in elem.tag and elem.attrib.has_key('id'):
+ fname = elem.attrib['id']
+ data = b64decode(elem.text.strip())
+ open(fname, 'wb').write(data)
+
diff --git a/src/calibre/ebooks/lrf/fb2/convert_from.py b/src/calibre/ebooks/lrf/fb2/convert_from.py
deleted file mode 100644
index 24562e708c..0000000000
--- a/src/calibre/ebooks/lrf/fb2/convert_from.py
+++ /dev/null
@@ -1,125 +0,0 @@
-from __future__ import with_statement
-__license__ = 'GPL v3'
-__copyright__ = '2008, Anatoly Shipitsin '
-"""
-Convert .fb2 files to .lrf
-"""
-import os, sys, shutil, logging
-from base64 import b64decode
-from lxml import etree
-
-from calibre.ebooks.lrf import option_parser as lrf_option_parser
-from calibre.ebooks.metadata.meta import get_metadata
-from calibre.ebooks.lrf.html.convert_from import process_file as html_process_file
-from calibre import setup_cli_handlers
-from calibre.resources import fb2_xsl
-from calibre.ptempfile import PersistentTemporaryDirectory
-from calibre.ebooks.metadata.opf import OPFCreator
-from calibre.ebooks.metadata import MetaInformation
-
-
-def option_parser():
- parser = lrf_option_parser(
-_('''%prog [options] mybook.fb2
-
-
-%prog converts mybook.fb2 to mybook.lrf'''))
- parser.add_option('--debug-html-generation', action='store_true', default=False,
- dest='debug_html_generation', help=_('Print generated HTML to stdout and quit.'))
- parser.add_option('--keep-intermediate-files', action='store_true', default=False,
- help=_('Keep generated HTML files after completing conversion to LRF.'))
- return parser
-
-def extract_embedded_content(doc):
- for elem in doc.xpath('./*'):
- if 'binary' in elem.tag and elem.attrib.has_key('id'):
- fname = elem.attrib['id']
- data = b64decode(elem.text.strip())
- open(fname, 'wb').write(data)
-
-def to_html(fb2file, tdir):
- fb2file = os.path.abspath(fb2file)
- cwd = os.getcwd()
- try:
- os.chdir(tdir)
- print 'Parsing XML...'
- parser = etree.XMLParser(recover=True, no_network=True)
- doc = etree.parse(fb2file, parser)
- extract_embedded_content(doc)
- print 'Converting XML to HTML...'
- styledoc = etree.fromstring(fb2_xsl)
-
- transform = etree.XSLT(styledoc)
- result = transform(doc)
- open('index.html', 'wb').write(transform.tostring(result))
- try:
- mi = get_metadata(open(fb2file, 'rb'), 'fb2')
- except:
- mi = MetaInformation(None, None)
- if not mi.title:
- mi.title = os.path.splitext(os.path.basename(fb2file))[0]
- if not mi.authors:
- mi.authors = [_('Unknown')]
- opf = OPFCreator(tdir, mi)
- opf.create_manifest([('index.html', None)])
- opf.create_spine(['index.html'])
- opf.render(open('metadata.opf', 'wb'))
- return os.path.join(tdir, 'metadata.opf')
- finally:
- os.chdir(cwd)
-
-
-def generate_html(fb2file, encoding, logger):
- tdir = PersistentTemporaryDirectory('_fb22lrf')
- to_html(fb2file, tdir)
- return os.path.join(tdir, 'index.html')
-
-def process_file(path, options, logger=None):
- if logger is None:
- level = logging.DEBUG if options.verbose else logging.INFO
- logger = logging.getLogger('fb22lrf')
- setup_cli_handlers(logger, level)
- fb2 = os.path.abspath(os.path.expanduser(path))
- f = open(fb2, 'rb')
- mi = get_metadata(f, 'fb2')
- f.close()
- htmlfile = generate_html(fb2, options.encoding, logger)
- tdir = os.path.dirname(htmlfile)
- cwd = os.getcwdu()
- try:
- if not options.output:
- ext = '.lrs' if options.lrs else '.lrf'
- options.output = os.path.abspath(os.path.basename(os.path.splitext(path)[0]) + ext)
- options.output = os.path.abspath(os.path.expanduser(options.output))
- if not mi.title:
- mi.title = os.path.splitext(os.path.basename(fb2))[0]
- if (not options.title or options.title == _('Unknown')):
- options.title = mi.title
- if (not options.author or options.author == _('Unknown')) and mi.authors:
- options.author = mi.authors.pop()
- if (not options.category or options.category == _('Unknown')) and mi.category:
- options.category = mi.category
- if (not options.freetext or options.freetext == _('Unknown')) and mi.comments:
- options.freetext = mi.comments
- os.chdir(tdir)
- html_process_file(htmlfile, options, logger)
- finally:
- os.chdir(cwd)
- if getattr(options, 'keep_intermediate_files', False):
- logger.debug('Intermediate files in '+ tdir)
- else:
- shutil.rmtree(tdir)
-
-def main(args=sys.argv, logger=None):
- parser = option_parser()
- options, args = parser.parse_args(args)
- if len(args) != 2:
- parser.print_help()
- print
- print 'No fb2 file specified'
- return 1
- process_file(args[1], options, logger)
- return 0
-
-if __name__ == '__main__':
- sys.exit(main())
diff --git a/src/calibre/ebooks/oeb/base.py b/src/calibre/ebooks/oeb/base.py
index dda36a7500..85510e2127 100644
--- a/src/calibre/ebooks/oeb/base.py
+++ b/src/calibre/ebooks/oeb/base.py
@@ -41,10 +41,12 @@ NCX_NS = 'http://www.daisy.org/z3986/2005/ncx/'
SVG_NS = 'http://www.w3.org/2000/svg'
XLINK_NS = 'http://www.w3.org/1999/xlink'
CALIBRE_NS = 'http://calibre.kovidgoyal.net/2009/metadata'
+RE_NS = 'http://exslt.org/regular-expressions'
+
XPNSMAP = {'h' : XHTML_NS, 'o1' : OPF1_NS, 'o2' : OPF2_NS,
'd09': DC09_NS, 'd10': DC10_NS, 'd11': DC11_NS,
'xsi': XSI_NS, 'dt' : DCTERMS_NS, 'ncx': NCX_NS,
- 'svg': SVG_NS, 'xl' : XLINK_NS}
+ 'svg': SVG_NS, 'xl' : XLINK_NS, 're': RE_NS}
OPF1_NSMAP = {'dc': DC11_NS, 'oebpackage': OPF1_NS}
OPF2_NSMAP = {'opf': OPF2_NS, 'dc': DC11_NS, 'dcterms': DCTERMS_NS,
'xsi': XSI_NS, 'calibre': CALIBRE_NS}
@@ -1256,16 +1258,21 @@ class TOC(object):
:attr:`klass`: Optional semantic class referenced by this node.
:attr:`id`: Option unique identifier for this node.
"""
- def __init__(self, title=None, href=None, klass=None, id=None):
+ def __init__(self, title=None, href=None, klass=None, id=None,
+ play_order=None):
self.title = title
self.href = urlnormalize(href) if href else href
self.klass = klass
self.id = id
self.nodes = []
+ self.play_order = 0
+ if play_order is None:
+ play_order = self.next_play_order()
+ self.play_order = play_order
- def add(self, title, href, klass=None, id=None):
+ def add(self, title, href, klass=None, id=None, play_order=0):
"""Create and return a new sub-node of this node."""
- node = TOC(title, href, klass, id)
+ node = TOC(title, href, klass, id, play_order)
self.nodes.append(node)
return node
@@ -1276,6 +1283,18 @@ class TOC(object):
for node in child.iter():
yield node
+ def count(self):
+ return len(list(self.iter())) - 1
+
+ def next_play_order(self):
+ return max([x.play_order for x in self.iter()])+1
+
+ def has_href(self, href):
+ for x in self.iter():
+ if x.href == href:
+ return True
+ return False
+
def iterdescendants(self):
"""Iterate over all descendant nodes in depth-first order."""
for child in self.nodes:
@@ -1309,6 +1328,10 @@ class TOC(object):
except ValueError:
return 1
+ def __str__(self):
+ return 'TOC: %s --> %s'%(self.title, self.href)
+
+
def to_opf1(self, tour):
for node in self.nodes:
element(tour, 'site', attrib={
@@ -1319,7 +1342,7 @@ class TOC(object):
def to_ncx(self, parent):
for node in self.nodes:
id = node.id or unicode(uuid.uuid4())
- attrib = {'id': id, 'playOrder': '0'}
+ attrib = {'id': id, 'playOrder': str(node.play_order)}
if node.klass:
attrib['class'] = node.klass
point = element(parent, NCX('navPoint'), attrib=attrib)
@@ -1329,6 +1352,34 @@ class TOC(object):
node.to_ncx(point)
return parent
+ def rationalize_play_orders(self):
+ '''
+ Ensure that all nodes with the same play_order have the same href and
+ with different play_orders have different hrefs.
+ '''
+ def po_node(n):
+ for x in self.iter():
+ if x is n:
+ return
+ if x.play_order == n.play_order:
+ return x
+
+ def href_node(n):
+ for x in self.iter():
+ if x is n:
+ return
+ if x.href == n.href:
+ return x
+
+ for x in self.iter():
+ y = po_node(x)
+ if y is not None:
+ if x.href != y.href:
+ x.play_order = getattr(href_node(x), 'play_order',
+ self.next_play_order())
+ y = href_node(x)
+ if y is not None:
+ x.play_order = y.play_order
class PageList(object):
"""Collection of named "pages" to mapped positions within an OEB data model
diff --git a/src/calibre/ebooks/oeb/iterator.py b/src/calibre/ebooks/oeb/iterator.py
index 81e1f89029..ab3e90083d 100644
--- a/src/calibre/ebooks/oeb/iterator.py
+++ b/src/calibre/ebooks/oeb/iterator.py
@@ -118,6 +118,7 @@ class EbookIterator(object):
print 'Loaded embedded font:', repr(family)
def __enter__(self):
+ self.delete_on_exit = []
self._tdir = TemporaryDirectory('_ebook_iter')
self.base = self._tdir.__enter__()
from calibre.ebooks.conversion.plumber import Plumber
@@ -137,9 +138,11 @@ class EbookIterator(object):
cover = self.opf.cover
if self.ebook_ext in ('lit', 'mobi', 'prc', 'opf') and cover:
- cfile = os.path.join(os.path.dirname(self.spine[0]), 'calibre_ei_cover.html')
+ cfile = os.path.join(os.path.dirname(self.spine[0]),
+ 'calibre_iterator_cover.html')
open(cfile, 'wb').write(TITLEPAGE%cover)
self.spine[0:0] = [SpineItem(cfile)]
+ self.delete_on_exit.append(cfile)
if self.opf.path_to_html_toc is not None and \
self.opf.path_to_html_toc not in self.spine:
@@ -221,3 +224,6 @@ class EbookIterator(object):
def __exit__(self, *args):
self._tdir.__exit__(*args)
+ for x in self.delete_on_exit:
+ if os.path.exists(x):
+ os.remove(x)
diff --git a/src/calibre/ebooks/oeb/reader.py b/src/calibre/ebooks/oeb/reader.py
index 6f0ff44bc9..02b3b92b01 100644
--- a/src/calibre/ebooks/oeb/reader.py
+++ b/src/calibre/ebooks/oeb/reader.py
@@ -343,7 +343,8 @@ class OEBReader(object):
continue
id = child.get('id')
klass = child.get('class')
- node = toc.add(title, href, id=id, klass=klass)
+ po = int(child.get('playOrder', self.oeb.toc.next_play_order()))
+ node = toc.add(title, href, id=id, klass=klass, play_order=po)
self._toc_from_navpoint(item, node, child)
def _toc_from_ncx(self, item):
diff --git a/src/calibre/ebooks/oeb/transforms/split.py b/src/calibre/ebooks/oeb/transforms/split.py
index 33ab14b73d..bc7e4e195d 100644
--- a/src/calibre/ebooks/oeb/transforms/split.py
+++ b/src/calibre/ebooks/oeb/transforms/split.py
@@ -15,12 +15,10 @@ from lxml.etree import XPath as _XPath
from lxml import etree
from lxml.cssselect import CSSSelector
-from calibre.ebooks.oeb.base import OEB_STYLES, XPNSMAP, urldefrag, \
- rewrite_links
+from calibre.ebooks.oeb.base import OEB_STYLES, XPNSMAP as NAMESPACES, \
+ urldefrag, rewrite_links
from calibre.ebooks.epub import tostring, rules
-NAMESPACES = dict(XPNSMAP)
-NAMESPACES['re'] = 'http://exslt.org/regular-expressions'
XPath = functools.partial(_XPath, namespaces=NAMESPACES)
diff --git a/src/calibre/ebooks/oeb/transforms/structure.py b/src/calibre/ebooks/oeb/transforms/structure.py
new file mode 100644
index 0000000000..0f1502ef03
--- /dev/null
+++ b/src/calibre/ebooks/oeb/transforms/structure.py
@@ -0,0 +1,151 @@
+#!/usr/bin/env python
+# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
+from __future__ import with_statement
+
+__license__ = 'GPL v3'
+__copyright__ = '2009, Kovid Goyal '
+__docformat__ = 'restructuredtext en'
+
+from lxml import etree
+from urlparse import urlparse
+
+from calibre.ebooks.oeb.base import XPNSMAP, TOC
+XPath = lambda x: etree.XPath(x, namespaces=XPNSMAP)
+
+class DetectStructure(object):
+
+ def __call__(self, oeb, opts):
+ self.log = oeb.log
+ self.oeb = oeb
+ self.opts = opts
+ self.log('Detecting structure...')
+
+ self.detect_chapters()
+ if self.oeb.auto_generated_toc or opts.use_auto_toc:
+ orig_toc = self.oeb.toc
+ self.oeb.toc = TOC()
+ self.create_level_based_toc()
+ if self.oeb.toc.count() < 1:
+ if not opts.no_chapters_in_toc and self.detected_chapters:
+ self.create_toc_from_chapters()
+ if self.oeb.toc.count() < opts.toc_threshold:
+ self.create_toc_from_links()
+ if self.oeb.toc.count() < 2 and orig_toc.count() > 2:
+ self.oeb.toc = orig_toc
+ else:
+ self.oeb.auto_generated_toc = True
+ self.log('Auto generated TOC with %d entries.' %
+ self.oeb.toc.count())
+
+
+ def detect_chapters(self):
+ self.detected_chapters = []
+ if self.opts.chapter:
+ chapter_xpath = XPath(self.opts.chapter)
+ for item in self.oeb.spine:
+ for x in chapter_xpath(item.data):
+ self.detected_chapters.append((item, x))
+
+ chapter_mark = self.opts.chapter_mark
+ page_break_before = 'display: block; page-break-before: always'
+ page_break_after = 'display: block; page-break-after: always'
+ for item, elem in self.detected_chapters:
+ text = u' '.join([t.strip() for t in elem.xpath('descendant::text()')])
+ self.log('\tDetected chapter:', text[:50])
+ if chapter_mark == 'none':
+ continue
+ elif chapter_mark == 'rule':
+ mark = etree.Element('hr')
+ elif chapter_mark == 'pagebreak':
+ mark = etree.Element('div', style=page_break_after)
+ else: # chapter_mark == 'both':
+ mark = etree.Element('hr', style=page_break_before)
+ elem.addprevious(mark)
+
+ def create_level_based_toc(self):
+ if self.opts.level1_toc is None:
+ return
+ for item in self.oeb.spine:
+ self.add_leveled_toc_items(item)
+
+ def create_toc_from_chapters(self):
+ counter = self.oeb.toc.next_play_order()
+ for item, elem in self.detected_chapters:
+ text, href = self.elem_to_link(item, elem, counter)
+ self.oeb.toc.add(text, href, play_order=counter)
+ counter += 1
+
+ def create_toc_from_links(self):
+ for item in self.oeb.spine:
+ for a in item.data.xpath('//h:a[@href]'):
+ href = a.get('href')
+ purl = urlparse(href)
+ if not purl[0] or purl[0] == 'file':
+ href, frag = purl.path, purl.fragment
+ href = item.abshref(href)
+ if frag:
+ href = '#'.join((href, frag))
+ if not self.oeb.toc.has_href(href):
+ text = u' '.join([t.strip() for t in \
+ a.xpath('descendant::text()')])
+ text = text[:100].strip()
+ if not self.oeb.toc.has_text(text):
+ self.oeb.toc.add(text, href,
+ play_order=self.oeb.toc.next_play_order())
+
+
+ def elem_to_link(self, item, elem, counter):
+ text = u' '.join([t.strip() for t in elem.xpath('descendant::text()')])
+ text = text[:100].strip()
+ id = elem.get('id', 'calibre_toc_%d'%counter)
+ elem.set('id', id)
+ href = '#'.join((item.href, id))
+ return text, href
+
+
+ def add_leveled_toc_items(self, item):
+ level1 = XPath(self.opts.level1_toc)(item.data)
+ level1_order = []
+
+ counter = 1
+ if level1:
+ added = {}
+ for elem in level1:
+ text, _href = self.elem_to_link(item, elem, counter)
+ counter += 1
+ if text:
+ node = self.oeb.toc.add(text, _href,
+ play_order=self.oeb.toc.next_play_order())
+ level1_order.append(node)
+ added[elem] = node
+ #node.add(_('Top'), _href)
+ if self.opts.level2_toc is not None:
+ added2 = {}
+ level2 = list(XPath(self.opts.level2_toc)(item.data))
+ for elem in level2:
+ level1 = None
+ for item in item.data.iterdescendants():
+ if item in added.keys():
+ level1 = added[item]
+ elif item == elem and level1 is not None:
+ text, _href = self.elem_to_link(item, elem, counter)
+ counter += 1
+ if text:
+ added2[elem] = level1.add(text, _href,
+ play_order=self.oeb.toc.next_play_order())
+ if self.opts.level3_toc is not None:
+ level3 = list(XPath(self.opts.level3_toc)(item.data))
+ for elem in level3:
+ level2 = None
+ for item in item.data.iterdescendants():
+ if item in added2.keys():
+ level2 = added2[item]
+ elif item == elem and level2 is not None:
+ text, _href = \
+ self.elem_to_link(item, elem, counter)
+ counter += 1
+ if text:
+ level2.add(text, _href,
+ play_order=self.oeb.toc.next_play_order())
+
+
diff --git a/src/calibre/linux.py b/src/calibre/linux.py
index ee51370b61..2d13ea2730 100644
--- a/src/calibre/linux.py
+++ b/src/calibre/linux.py
@@ -27,10 +27,6 @@ entry_points = {
'lrs2lrf = calibre.ebooks.lrf.lrs.convert_from:main',
'isbndb = calibre.ebooks.metadata.isbndb:main',
'librarything = calibre.ebooks.metadata.library_thing:main',
- 'comic2lrf = calibre.ebooks.lrf.comic.convert_from:main',
- 'comic2epub = calibre.ebooks.epub.from_comic:main',
- 'comic2mobi = calibre.ebooks.mobi.from_comic:main',
- 'comic2pdf = calibre.ebooks.pdf.from_comic:main',
'calibre-debug = calibre.debug:main',
'calibredb = calibre.library.cli:main',
'calibre-fontconfig = calibre.utils.fontconfig:main',
@@ -151,8 +147,6 @@ def setup_completion(fatal_errors):
from calibre.ebooks.lrf.pdf.reflow import option_parser as pdfhtmlop
from calibre.web.feeds.main import option_parser as feeds2disk
from calibre.web.feeds.recipes import titles as feed_titles
- from calibre.ebooks.lrf.comic.convert_from import option_parser as comicop
- from calibre.ebooks.epub.from_comic import option_parser as comic2epub
from calibre.ebooks.metadata.fetch import option_parser as fem_op
from calibre.gui2.main import option_parser as guiop
from calibre.utils.smtp import option_parser as smtp_op
@@ -181,10 +175,6 @@ def setup_completion(fatal_errors):
f.write(opts_and_exts('ebook-meta', metaop, list(meta_filetypes())))
f.write(opts_and_exts('lrfviewer', lrfviewerop, ['lrf']))
f.write(opts_and_exts('pdfrelow', pdfhtmlop, ['pdf']))
- f.write(opts_and_exts('comic2lrf', comicop, ['cbz', 'cbr']))
- f.write(opts_and_exts('comic2epub', comic2epub, ['cbz', 'cbr']))
- f.write(opts_and_exts('comic2mobi', comic2epub, ['cbz', 'cbr']))
- f.write(opts_and_exts('comic2pdf', comic2epub, ['cbz', 'cbr']))
f.write(opts_and_words('feeds2disk', feeds2disk, feed_titles))
f.write(opts_and_words('fetch-ebook-metadata', fem_op, []))
f.write(opts_and_words('calibre-smtp', smtp_op, []))
diff --git a/upload.py b/upload.py
index 6bc90aada2..a29e5b097c 100644
--- a/upload.py
+++ b/upload.py
@@ -139,7 +139,7 @@ class resources(OptionlessCommand):
RESOURCES = dict(
opf_template = 'ebooks/metadata/opf.xml',
ncx_template = 'ebooks/metadata/ncx.xml',
- fb2_xsl = 'ebooks/lrf/fb2/fb2.xsl',
+ fb2_xsl = 'ebooks/fb2/fb2.xsl',
metadata_sqlite = 'library/metadata_sqlite.sql',
jquery = 'gui2/viewer/jquery.js',
jquery_scrollTo = 'gui2/viewer/jquery_scrollTo.js',