Ported structure detection code and added plugin for FB2 input.

This commit is contained in:
Kovid Goyal 2009-04-19 14:44:37 -07:00
parent 02cfaac014
commit 1770f7bf74
15 changed files with 422 additions and 247 deletions

View File

@ -281,6 +281,7 @@ from calibre.ebooks.mobi.input import MOBIInput
from calibre.ebooks.pdf.input import PDFInput
from calibre.ebooks.txt.input import TXTInput
from calibre.ebooks.lit.input import LITInput
from calibre.ebooks.fb2.input import FB2Input
from calibre.ebooks.html.input import HTMLInput
from calibre.ebooks.oeb.output import OEBOutput
from calibre.ebooks.txt.output import TXTOutput
@ -288,7 +289,8 @@ from calibre.ebooks.pdf.output import PDFOutput
from calibre.customize.profiles import input_profiles, output_profiles
plugins = [HTML2ZIP, EPUBInput, MOBIInput, PDFInput, HTMLInput,
TXTInput, OEBOutput, TXTOutput, PDFOutput, LITInput]
TXTInput, OEBOutput, TXTOutput, PDFOutput, LITInput,
FB2Input]
plugins += [x for x in list(locals().values()) if isinstance(x, type) and \
x.__name__.endswith('MetadataReader')]
plugins += [x for x in list(locals().values()) if isinstance(x, type) and \

View File

@ -119,6 +119,24 @@ def add_pipeline_options(parser, plumber):
]
),
'STRUCTURE DETECTION' : (
_('Control auto-detection of document structure.'),
[
'dont_split_on_page_breaks', 'chapter', 'chapter_mark',
]
),
'TABLE OF CONTENTS' : (
_('Control the automatic generation of a Table of Contents. By '
'default, if the source file has a Table of Contents, it will '
'be used in preference to the automatically generated one.'),
[
'level1_toc', 'level2_toc', 'level3_toc',
'toc_threshold', 'max_toc_links', 'no_chapters_in_toc',
'use_auto_toc',
]
),
'METADATA' : (_('Options to set metadata in the output'),
plumber.metadata_option_names,
),
@ -130,7 +148,8 @@ def add_pipeline_options(parser, plumber):
}
group_order = ['', 'LOOK AND FEEL', 'METADATA', 'DEBUG']
group_order = ['', 'LOOK AND FEEL', 'STRUCTURE DETECTION',
'TABLE OF CONTENTS', 'METADATA', 'DEBUG']
for group in group_order:
desc, options = groups[group]
@ -163,6 +182,10 @@ def main(args=sys.argv):
add_pipeline_options(parser, plumber)
opts = parser.parse_args(args)[0]
y = lambda q : os.path.abspath(os.path.expanduser(q))
for x in ('read_metadata_from_opf', 'cover'):
if getattr(opts, x, None) is not None:
setattr(opts, x, y(getattr(opts, x)))
recommendations = [(n.dest, getattr(opts, n.dest),
OptionRecommendation.HIGH) \
for n in parser.options_iter()

View File

@ -121,6 +121,88 @@ OptionRecommendation(name='dont_split_on_page_breaks',
)
),
OptionRecommendation(name='level1_toc',
recommended_value=None, level=OptionRecommendation.LOW,
help=_('XPath expression that specifies all tags that '
'should be added to the Table of Contents at level one. If '
'this is specified, it takes precedence over other forms '
'of auto-detection.'
)
),
OptionRecommendation(name='level2_toc',
recommended_value=None, level=OptionRecommendation.LOW,
help=_('XPath expression that specifies all tags that should be '
'added to the Table of Contents at level two. Each entry is added '
'under the previous level one entry.'
)
),
OptionRecommendation(name='level3_toc',
recommended_value=None, level=OptionRecommendation.LOW,
help=_('XPath expression that specifies all tags that should be '
'added to the Table of Contents at level three. Each entry '
'is added under the previous level two entry.'
)
),
OptionRecommendation(name='use_auto_toc',
recommended_value=False, level=OptionRecommendation.LOW,
help=_('Normally, if the source file already has a Table of '
'Contents, it is used in preference to the auto-generated one. '
'With this option, the auto-generated one is always used.'
)
),
OptionRecommendation(name='no_chapters_in_toc',
recommended_value=False, level=OptionRecommendation.LOW,
help=_("Don't add auto-detected chapters to the Table of "
'Contents.'
)
),
OptionRecommendation(name='toc_threshold',
recommended_value=6, level=OptionRecommendation.LOW,
help=_(
'If fewer than this number of chapters is detected, then links '
'are added to the Table of Contents. Default: %default')
),
OptionRecommendation(name='max_toc_links',
recommended_value=50, level=OptionRecommendation.LOW,
help=_('Maximum number of links to insert into the TOC. Set to 0 '
'to disable. Default is: %default. Links are only added to the '
'TOC if less than the threshold number of chapters were detected.'
)
),
OptionRecommendation(name='chapter',
recommended_value="//*[((name()='h1' or name()='h2') and "
"re:test(., 'chapter|book|section|part', 'i')) or @class "
"= 'chapter']", level=OptionRecommendation.LOW,
help=_('An XPath expression to detect chapter titles. The default '
'is to consider <h1> or <h2> tags that contain the words '
'"chapter","book","section" or "part" as chapter titles as '
'well as any tags that have class="chapter". The expression '
'used must evaluate to a list of elements. To disable chapter '
'detection, use the expression "/". See the XPath Tutorial '
'in the calibre User Manual for further help on using this '
'feature.'
)
),
OptionRecommendation(name='chapter_mark',
recommended_value='pagebreak', level=OptionRecommendation.LOW,
choices=['pagebreak', 'rule', 'both', 'none'],
help=_('Specify how to mark detected chapters. A value of '
'"pagebreak" will insert page breaks before chapters. '
'A value of "rule" will insert a line before chapters. '
'A value of "none" will disable chapter marking and a '
'value of "both" will use both page breaks and lines '
'to mark chapters.')
),
OptionRecommendation(name='read_metadata_from_opf',
recommended_value=None, level=OptionRecommendation.LOW,
@ -130,6 +212,7 @@ OptionRecommendation(name='read_metadata_from_opf',
'file.')
),
OptionRecommendation(name='title',
recommended_value=None, level=OptionRecommendation.LOW,
help=_('Set the title.')),
@ -237,6 +320,7 @@ OptionRecommendation(name='language',
rec = self.get_option_by_name(name)
if rec is not None and rec.level <= level:
rec.recommended_value = val
rec.level = level
def merge_ui_recommendations(self, recommendations):
'''
@ -248,6 +332,7 @@ OptionRecommendation(name='language',
rec = self.get_option_by_name(name)
if rec is not None and rec.level <= level and rec.level < rec.HIGH:
rec.recommended_value = val
rec.level = level
def read_user_metadata(self):
'''
@ -332,6 +417,9 @@ OptionRecommendation(name='language',
self.opts.source = self.opts.input_profile
self.opts.dest = self.opts.output_profile
from calibre.ebooks.oeb.transforms.structure import DetectStructure
DetectStructure()(self.oeb, self.opts)
from calibre.ebooks.oeb.transforms.flatcss import CSSFlattener
fbase = self.opts.base_font_size
if fbase == 0:
@ -364,6 +452,8 @@ OptionRecommendation(name='language',
trimmer = ManifestTrimmer()
trimmer(self.oeb, self.opts)
self.oeb.toc.rationalize_play_orders()
self.log.info('Creating %s...'%self.output_plugin.name)
self.output_plugin.convert(self.oeb, self.output, self.input_plugin,
self.opts, self.log)
@ -384,4 +474,3 @@ def create_oebbook(log, path_or_stream, opts, reader=None):
reader()(oeb, path_or_stream)
return oeb

View File

@ -15,88 +15,15 @@ from calibre.ebooks import DRMError
from calibre.ebooks.epub import config as common_config
from calibre.ebooks.epub.from_html import convert as html2epub, find_html_index
from calibre.ptempfile import TemporaryDirectory
from calibre.ebooks.metadata import MetaInformation
from calibre.ebooks.metadata.opf2 import OPFCreator, OPF
from calibre.utils.zipfile import ZipFile
from calibre.customize.ui import run_plugins_on_preprocess
def lit2opf(path, tdir, opts):
from calibre.ebooks.lit.reader import LitReader
print 'Exploding LIT file:', path
reader = LitReader(path)
reader.extract_content(tdir, False)
opf = None
for opf in walk(tdir):
if opf.lower().endswith('.opf'):
break
if not opf.endswith('.opf'):
opf = None
if opf is not None: # Check for url-quoted filenames
_opf = OPF(opf, os.path.dirname(opf))
replacements = []
for item in _opf.itermanifest():
href = item.get('href', '')
path = os.path.join(os.path.dirname(opf), *(href.split('/')))
if not os.path.exists(path) and os.path.exists(path.replace('&', '%26')):
npath = path
path = path.replace('&', '%26')
replacements.append((path, npath))
if replacements:
print 'Fixing quoted filenames...'
for path, npath in replacements:
if os.path.exists(path):
os.rename(path, npath)
for f in walk(tdir):
with open(f, 'r+b') as f:
raw = f.read()
for path, npath in replacements:
raw = raw.replace(os.path.basename(path), os.path.basename(npath))
f.seek(0)
f.truncate()
f.write(raw)
return opf
def mobi2opf(path, tdir, opts):
from calibre.ebooks.mobi.reader import MobiReader
print 'Exploding MOBI file:', path.encode('utf-8') if isinstance(path, unicode) else path
reader = MobiReader(path)
reader.extract_content(tdir)
files = list(walk(tdir))
opts.encoding = 'utf-8'
for f in files:
if f.lower().endswith('.opf'):
return f
html_pat = re.compile(r'\.(x){0,1}htm(l){0,1}', re.IGNORECASE)
hf = [f for f in files if html_pat.match(os.path.splitext(f)[1]) is not None]
mi = MetaInformation(os.path.splitext(os.path.basename(path))[0], [_('Unknown')])
opf = OPFCreator(tdir, mi)
opf.create_manifest([(hf[0], None)])
opf.create_spine([hf[0]])
ans = os.path.join(tdir, 'metadata.opf')
opf.render(open(ans, 'wb'))
return ans
def fb22opf(path, tdir, opts):
from calibre.ebooks.lrf.fb2.convert_from import to_html
print 'Converting FB2 to HTML...'
return to_html(path, tdir)
def rtf2opf(path, tdir, opts):
from calibre.ebooks.lrf.rtf.convert_from import generate_html
generate_html(path, tdir)
return os.path.join(tdir, 'metadata.opf')
def txt2opf(path, tdir, opts):
from calibre.ebooks.lrf.txt.convert_from import generate_html
generate_html(path, opts.encoding, tdir)
return os.path.join(tdir, 'metadata.opf')
def pdf2opf(path, tdir, opts):
from calibre.ebooks.lrf.pdf.convert_from import generate_html
generate_html(path, tdir)
opts.dont_split_on_page_breaks = True
return os.path.join(tdir, 'metadata.opf')
def epub2opf(path, tdir, opts):
zf = ZipFile(path)
zf.extractall(tdir)
@ -110,35 +37,23 @@ def epub2opf(path, tdir, opts):
if opf and os.path.exists(encfile):
if not process_encryption(encfile, opf):
raise DRMError(os.path.basename(path))
if opf is None:
raise ValueError('%s is not a valid EPUB file'%path)
return opf
def odt2epub(path, tdir, opts):
from calibre.ebooks.odt.to_oeb import Extract
opts.encoding = 'utf-8'
return Extract()(path, tdir)
MAP = {
'lit' : lit2opf,
'mobi' : mobi2opf,
'prc' : mobi2opf,
'azw' : mobi2opf,
'fb2' : fb22opf,
'rtf' : rtf2opf,
'txt' : txt2opf,
'pdf' : pdf2opf,
'epub' : epub2opf,
'odt' : odt2epub,
}
SOURCE_FORMATS = ['lit', 'mobi', 'prc', 'azw', 'fb2', 'odt', 'rtf',
SOURCE_FORMATS = ['lit', 'mobi', 'prc', 'azw', 'fb2', 'odt', 'rtf',
'txt', 'pdf', 'rar', 'zip', 'oebzip', 'htm', 'html', 'epub']
def unarchive(path, tdir):
extract(path, tdir)
files = list(walk(tdir))
for ext in ['opf'] + list(MAP.keys()):
for f in files:
if f.lower().endswith('.'+ext):
@ -147,32 +62,32 @@ def unarchive(path, tdir):
return f, ext
return find_html_index(files)
def any2epub(opts, path, notification=None, create_epub=True,
def any2epub(opts, path, notification=None, create_epub=True,
oeb_cover=False, extract_to=None):
path = run_plugins_on_preprocess(path)
ext = os.path.splitext(path)[1]
if not ext:
raise ValueError('Unknown file type: '+path)
ext = ext.lower()[1:]
if opts.output is None:
opts.output = os.path.splitext(os.path.basename(path))[0]+'.epub'
with nested(TemporaryDirectory('_any2epub1'), TemporaryDirectory('_any2epub2')) as (tdir1, tdir2):
if ext in ['rar', 'zip', 'oebzip']:
path, ext = unarchive(path, tdir1)
print 'Found %s file in archive'%(ext.upper())
if ext in MAP.keys():
path = MAP[ext](path, tdir2, opts)
ext = 'opf'
if re.match(r'((x){0,1}htm(l){0,1})|opf', ext) is None:
raise ValueError('Conversion from %s is not supported'%ext.upper())
print 'Creating EPUB file...'
html2epub(path, opts, notification=notification,
html2epub(path, opts, notification=notification,
create_epub=create_epub, oeb_cover=oeb_cover,
extract_to=extract_to)

View File

@ -0,0 +1,74 @@
from __future__ import with_statement
__license__ = 'GPL v3'
__copyright__ = '2008, Anatoly Shipitsin <norguhtar at gmail.com>'
"""
Convert .fb2 files to .lrf
"""
import os
from base64 import b64decode
from lxml import etree
from calibre.customize.conversion import InputFormatPlugin, OptionRecommendation
from calibre import guess_type
FB2NS = 'http://www.gribuser.ru/xml/fictionbook/2.0'
class FB2Input(InputFormatPlugin):
name = 'FB2 Input'
author = 'Anatoly Shipitsin'
description = 'Convert FB2 files to HTML'
file_types = set(['fb2'])
recommendations = set([
('level1_toc', '//h:h1', OptionRecommendation.MED),
('level2_toc', '//h:h2', OptionRecommendation.MED),
('level3_toc', '//h:h3', OptionRecommendation.MED),
])
def convert(self, stream, options, file_ext, log,
accelerators):
from calibre.resources import fb2_xsl
from calibre.ebooks.metadata.opf2 import OPFCreator
from calibre.ebooks.metadata.meta import get_metadata
from calibre.ebooks.oeb.base import XLINK_NS
NAMESPACES = {'f':FB2NS, 'l':XLINK_NS}
log.debug('Parsing XML...')
parser = etree.XMLParser(recover=True, no_network=True)
doc = etree.parse(stream, parser)
self.extract_embedded_content(doc)
log.debug('Converting XML to HTML...')
styledoc = etree.fromstring(fb2_xsl)
transform = etree.XSLT(styledoc)
result = transform(doc)
open('index.xhtml', 'wb').write(transform.tostring(result))
stream.seek(0)
mi = get_metadata(stream, 'fb2')
if not mi.title:
mi.title = _('Unknown')
if not mi.authors:
mi.authors = [_('Unknown')]
opf = OPFCreator(os.getcwdu(), mi)
entries = [(f, guess_type(f)[0]) for f in os.listdir('.')]
opf.create_manifest(entries)
opf.create_spine(['index.xhtml'])
for img in doc.xpath('//f:coverpage/f:image', namespaces=NAMESPACES):
href = img.get('{%s}href'%XLINK_NS, img.get('href', None))
if href is not None:
if href.startswith('#'):
href = href[1:]
opf.guide.set_cover(os.path.abspath(href))
opf.render(open('metadata.opf', 'wb'))
return os.path.join(os.getcwd(), 'metadata.opf')
def extract_embedded_content(self, doc):
for elem in doc.xpath('./*'):
if 'binary' in elem.tag and elem.attrib.has_key('id'):
fname = elem.attrib['id']
data = b64decode(elem.text.strip())
open(fname, 'wb').write(data)

View File

@ -1,125 +0,0 @@
from __future__ import with_statement
__license__ = 'GPL v3'
__copyright__ = '2008, Anatoly Shipitsin <norguhtar at gmail.com>'
"""
Convert .fb2 files to .lrf
"""
import os, sys, shutil, logging
from base64 import b64decode
from lxml import etree
from calibre.ebooks.lrf import option_parser as lrf_option_parser
from calibre.ebooks.metadata.meta import get_metadata
from calibre.ebooks.lrf.html.convert_from import process_file as html_process_file
from calibre import setup_cli_handlers
from calibre.resources import fb2_xsl
from calibre.ptempfile import PersistentTemporaryDirectory
from calibre.ebooks.metadata.opf import OPFCreator
from calibre.ebooks.metadata import MetaInformation
def option_parser():
parser = lrf_option_parser(
_('''%prog [options] mybook.fb2
%prog converts mybook.fb2 to mybook.lrf'''))
parser.add_option('--debug-html-generation', action='store_true', default=False,
dest='debug_html_generation', help=_('Print generated HTML to stdout and quit.'))
parser.add_option('--keep-intermediate-files', action='store_true', default=False,
help=_('Keep generated HTML files after completing conversion to LRF.'))
return parser
def extract_embedded_content(doc):
for elem in doc.xpath('./*'):
if 'binary' in elem.tag and elem.attrib.has_key('id'):
fname = elem.attrib['id']
data = b64decode(elem.text.strip())
open(fname, 'wb').write(data)
def to_html(fb2file, tdir):
fb2file = os.path.abspath(fb2file)
cwd = os.getcwd()
try:
os.chdir(tdir)
print 'Parsing XML...'
parser = etree.XMLParser(recover=True, no_network=True)
doc = etree.parse(fb2file, parser)
extract_embedded_content(doc)
print 'Converting XML to HTML...'
styledoc = etree.fromstring(fb2_xsl)
transform = etree.XSLT(styledoc)
result = transform(doc)
open('index.html', 'wb').write(transform.tostring(result))
try:
mi = get_metadata(open(fb2file, 'rb'), 'fb2')
except:
mi = MetaInformation(None, None)
if not mi.title:
mi.title = os.path.splitext(os.path.basename(fb2file))[0]
if not mi.authors:
mi.authors = [_('Unknown')]
opf = OPFCreator(tdir, mi)
opf.create_manifest([('index.html', None)])
opf.create_spine(['index.html'])
opf.render(open('metadata.opf', 'wb'))
return os.path.join(tdir, 'metadata.opf')
finally:
os.chdir(cwd)
def generate_html(fb2file, encoding, logger):
tdir = PersistentTemporaryDirectory('_fb22lrf')
to_html(fb2file, tdir)
return os.path.join(tdir, 'index.html')
def process_file(path, options, logger=None):
if logger is None:
level = logging.DEBUG if options.verbose else logging.INFO
logger = logging.getLogger('fb22lrf')
setup_cli_handlers(logger, level)
fb2 = os.path.abspath(os.path.expanduser(path))
f = open(fb2, 'rb')
mi = get_metadata(f, 'fb2')
f.close()
htmlfile = generate_html(fb2, options.encoding, logger)
tdir = os.path.dirname(htmlfile)
cwd = os.getcwdu()
try:
if not options.output:
ext = '.lrs' if options.lrs else '.lrf'
options.output = os.path.abspath(os.path.basename(os.path.splitext(path)[0]) + ext)
options.output = os.path.abspath(os.path.expanduser(options.output))
if not mi.title:
mi.title = os.path.splitext(os.path.basename(fb2))[0]
if (not options.title or options.title == _('Unknown')):
options.title = mi.title
if (not options.author or options.author == _('Unknown')) and mi.authors:
options.author = mi.authors.pop()
if (not options.category or options.category == _('Unknown')) and mi.category:
options.category = mi.category
if (not options.freetext or options.freetext == _('Unknown')) and mi.comments:
options.freetext = mi.comments
os.chdir(tdir)
html_process_file(htmlfile, options, logger)
finally:
os.chdir(cwd)
if getattr(options, 'keep_intermediate_files', False):
logger.debug('Intermediate files in '+ tdir)
else:
shutil.rmtree(tdir)
def main(args=sys.argv, logger=None):
parser = option_parser()
options, args = parser.parse_args(args)
if len(args) != 2:
parser.print_help()
print
print 'No fb2 file specified'
return 1
process_file(args[1], options, logger)
return 0
if __name__ == '__main__':
sys.exit(main())

View File

@ -41,10 +41,12 @@ NCX_NS = 'http://www.daisy.org/z3986/2005/ncx/'
SVG_NS = 'http://www.w3.org/2000/svg'
XLINK_NS = 'http://www.w3.org/1999/xlink'
CALIBRE_NS = 'http://calibre.kovidgoyal.net/2009/metadata'
RE_NS = 'http://exslt.org/regular-expressions'
XPNSMAP = {'h' : XHTML_NS, 'o1' : OPF1_NS, 'o2' : OPF2_NS,
'd09': DC09_NS, 'd10': DC10_NS, 'd11': DC11_NS,
'xsi': XSI_NS, 'dt' : DCTERMS_NS, 'ncx': NCX_NS,
'svg': SVG_NS, 'xl' : XLINK_NS}
'svg': SVG_NS, 'xl' : XLINK_NS, 're': RE_NS}
OPF1_NSMAP = {'dc': DC11_NS, 'oebpackage': OPF1_NS}
OPF2_NSMAP = {'opf': OPF2_NS, 'dc': DC11_NS, 'dcterms': DCTERMS_NS,
'xsi': XSI_NS, 'calibre': CALIBRE_NS}
@ -1256,16 +1258,21 @@ class TOC(object):
:attr:`klass`: Optional semantic class referenced by this node.
:attr:`id`: Option unique identifier for this node.
"""
def __init__(self, title=None, href=None, klass=None, id=None):
def __init__(self, title=None, href=None, klass=None, id=None,
play_order=None):
self.title = title
self.href = urlnormalize(href) if href else href
self.klass = klass
self.id = id
self.nodes = []
self.play_order = 0
if play_order is None:
play_order = self.next_play_order()
self.play_order = play_order
def add(self, title, href, klass=None, id=None):
def add(self, title, href, klass=None, id=None, play_order=0):
"""Create and return a new sub-node of this node."""
node = TOC(title, href, klass, id)
node = TOC(title, href, klass, id, play_order)
self.nodes.append(node)
return node
@ -1276,6 +1283,18 @@ class TOC(object):
for node in child.iter():
yield node
def count(self):
return len(list(self.iter())) - 1
def next_play_order(self):
return max([x.play_order for x in self.iter()])+1
def has_href(self, href):
for x in self.iter():
if x.href == href:
return True
return False
def iterdescendants(self):
"""Iterate over all descendant nodes in depth-first order."""
for child in self.nodes:
@ -1309,6 +1328,10 @@ class TOC(object):
except ValueError:
return 1
def __str__(self):
return 'TOC: %s --> %s'%(self.title, self.href)
def to_opf1(self, tour):
for node in self.nodes:
element(tour, 'site', attrib={
@ -1319,7 +1342,7 @@ class TOC(object):
def to_ncx(self, parent):
for node in self.nodes:
id = node.id or unicode(uuid.uuid4())
attrib = {'id': id, 'playOrder': '0'}
attrib = {'id': id, 'playOrder': str(node.play_order)}
if node.klass:
attrib['class'] = node.klass
point = element(parent, NCX('navPoint'), attrib=attrib)
@ -1329,6 +1352,34 @@ class TOC(object):
node.to_ncx(point)
return parent
def rationalize_play_orders(self):
'''
Ensure that all nodes with the same play_order have the same href and
with different play_orders have different hrefs.
'''
def po_node(n):
for x in self.iter():
if x is n:
return
if x.play_order == n.play_order:
return x
def href_node(n):
for x in self.iter():
if x is n:
return
if x.href == n.href:
return x
for x in self.iter():
y = po_node(x)
if y is not None:
if x.href != y.href:
x.play_order = getattr(href_node(x), 'play_order',
self.next_play_order())
y = href_node(x)
if y is not None:
x.play_order = y.play_order
class PageList(object):
"""Collection of named "pages" to mapped positions within an OEB data model

View File

@ -118,6 +118,7 @@ class EbookIterator(object):
print 'Loaded embedded font:', repr(family)
def __enter__(self):
self.delete_on_exit = []
self._tdir = TemporaryDirectory('_ebook_iter')
self.base = self._tdir.__enter__()
from calibre.ebooks.conversion.plumber import Plumber
@ -137,9 +138,11 @@ class EbookIterator(object):
cover = self.opf.cover
if self.ebook_ext in ('lit', 'mobi', 'prc', 'opf') and cover:
cfile = os.path.join(os.path.dirname(self.spine[0]), 'calibre_ei_cover.html')
cfile = os.path.join(os.path.dirname(self.spine[0]),
'calibre_iterator_cover.html')
open(cfile, 'wb').write(TITLEPAGE%cover)
self.spine[0:0] = [SpineItem(cfile)]
self.delete_on_exit.append(cfile)
if self.opf.path_to_html_toc is not None and \
self.opf.path_to_html_toc not in self.spine:
@ -221,3 +224,6 @@ class EbookIterator(object):
def __exit__(self, *args):
self._tdir.__exit__(*args)
for x in self.delete_on_exit:
if os.path.exists(x):
os.remove(x)

View File

@ -343,7 +343,8 @@ class OEBReader(object):
continue
id = child.get('id')
klass = child.get('class')
node = toc.add(title, href, id=id, klass=klass)
po = int(child.get('playOrder', self.oeb.toc.next_play_order()))
node = toc.add(title, href, id=id, klass=klass, play_order=po)
self._toc_from_navpoint(item, node, child)
def _toc_from_ncx(self, item):

View File

@ -15,12 +15,10 @@ from lxml.etree import XPath as _XPath
from lxml import etree
from lxml.cssselect import CSSSelector
from calibre.ebooks.oeb.base import OEB_STYLES, XPNSMAP, urldefrag, \
rewrite_links
from calibre.ebooks.oeb.base import OEB_STYLES, XPNSMAP as NAMESPACES, \
urldefrag, rewrite_links
from calibre.ebooks.epub import tostring, rules
NAMESPACES = dict(XPNSMAP)
NAMESPACES['re'] = 'http://exslt.org/regular-expressions'
XPath = functools.partial(_XPath, namespaces=NAMESPACES)

View File

@ -0,0 +1,151 @@
#!/usr/bin/env python
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
from __future__ import with_statement
__license__ = 'GPL v3'
__copyright__ = '2009, Kovid Goyal <kovid@kovidgoyal.net>'
__docformat__ = 'restructuredtext en'
from lxml import etree
from urlparse import urlparse
from calibre.ebooks.oeb.base import XPNSMAP, TOC
XPath = lambda x: etree.XPath(x, namespaces=XPNSMAP)
class DetectStructure(object):
def __call__(self, oeb, opts):
self.log = oeb.log
self.oeb = oeb
self.opts = opts
self.log('Detecting structure...')
self.detect_chapters()
if self.oeb.auto_generated_toc or opts.use_auto_toc:
orig_toc = self.oeb.toc
self.oeb.toc = TOC()
self.create_level_based_toc()
if self.oeb.toc.count() < 1:
if not opts.no_chapters_in_toc and self.detected_chapters:
self.create_toc_from_chapters()
if self.oeb.toc.count() < opts.toc_threshold:
self.create_toc_from_links()
if self.oeb.toc.count() < 2 and orig_toc.count() > 2:
self.oeb.toc = orig_toc
else:
self.oeb.auto_generated_toc = True
self.log('Auto generated TOC with %d entries.' %
self.oeb.toc.count())
def detect_chapters(self):
self.detected_chapters = []
if self.opts.chapter:
chapter_xpath = XPath(self.opts.chapter)
for item in self.oeb.spine:
for x in chapter_xpath(item.data):
self.detected_chapters.append((item, x))
chapter_mark = self.opts.chapter_mark
page_break_before = 'display: block; page-break-before: always'
page_break_after = 'display: block; page-break-after: always'
for item, elem in self.detected_chapters:
text = u' '.join([t.strip() for t in elem.xpath('descendant::text()')])
self.log('\tDetected chapter:', text[:50])
if chapter_mark == 'none':
continue
elif chapter_mark == 'rule':
mark = etree.Element('hr')
elif chapter_mark == 'pagebreak':
mark = etree.Element('div', style=page_break_after)
else: # chapter_mark == 'both':
mark = etree.Element('hr', style=page_break_before)
elem.addprevious(mark)
def create_level_based_toc(self):
if self.opts.level1_toc is None:
return
for item in self.oeb.spine:
self.add_leveled_toc_items(item)
def create_toc_from_chapters(self):
counter = self.oeb.toc.next_play_order()
for item, elem in self.detected_chapters:
text, href = self.elem_to_link(item, elem, counter)
self.oeb.toc.add(text, href, play_order=counter)
counter += 1
def create_toc_from_links(self):
for item in self.oeb.spine:
for a in item.data.xpath('//h:a[@href]'):
href = a.get('href')
purl = urlparse(href)
if not purl[0] or purl[0] == 'file':
href, frag = purl.path, purl.fragment
href = item.abshref(href)
if frag:
href = '#'.join((href, frag))
if not self.oeb.toc.has_href(href):
text = u' '.join([t.strip() for t in \
a.xpath('descendant::text()')])
text = text[:100].strip()
if not self.oeb.toc.has_text(text):
self.oeb.toc.add(text, href,
play_order=self.oeb.toc.next_play_order())
def elem_to_link(self, item, elem, counter):
text = u' '.join([t.strip() for t in elem.xpath('descendant::text()')])
text = text[:100].strip()
id = elem.get('id', 'calibre_toc_%d'%counter)
elem.set('id', id)
href = '#'.join((item.href, id))
return text, href
def add_leveled_toc_items(self, item):
level1 = XPath(self.opts.level1_toc)(item.data)
level1_order = []
counter = 1
if level1:
added = {}
for elem in level1:
text, _href = self.elem_to_link(item, elem, counter)
counter += 1
if text:
node = self.oeb.toc.add(text, _href,
play_order=self.oeb.toc.next_play_order())
level1_order.append(node)
added[elem] = node
#node.add(_('Top'), _href)
if self.opts.level2_toc is not None:
added2 = {}
level2 = list(XPath(self.opts.level2_toc)(item.data))
for elem in level2:
level1 = None
for item in item.data.iterdescendants():
if item in added.keys():
level1 = added[item]
elif item == elem and level1 is not None:
text, _href = self.elem_to_link(item, elem, counter)
counter += 1
if text:
added2[elem] = level1.add(text, _href,
play_order=self.oeb.toc.next_play_order())
if self.opts.level3_toc is not None:
level3 = list(XPath(self.opts.level3_toc)(item.data))
for elem in level3:
level2 = None
for item in item.data.iterdescendants():
if item in added2.keys():
level2 = added2[item]
elif item == elem and level2 is not None:
text, _href = \
self.elem_to_link(item, elem, counter)
counter += 1
if text:
level2.add(text, _href,
play_order=self.oeb.toc.next_play_order())

View File

@ -27,10 +27,6 @@ entry_points = {
'lrs2lrf = calibre.ebooks.lrf.lrs.convert_from:main',
'isbndb = calibre.ebooks.metadata.isbndb:main',
'librarything = calibre.ebooks.metadata.library_thing:main',
'comic2lrf = calibre.ebooks.lrf.comic.convert_from:main',
'comic2epub = calibre.ebooks.epub.from_comic:main',
'comic2mobi = calibre.ebooks.mobi.from_comic:main',
'comic2pdf = calibre.ebooks.pdf.from_comic:main',
'calibre-debug = calibre.debug:main',
'calibredb = calibre.library.cli:main',
'calibre-fontconfig = calibre.utils.fontconfig:main',
@ -151,8 +147,6 @@ def setup_completion(fatal_errors):
from calibre.ebooks.lrf.pdf.reflow import option_parser as pdfhtmlop
from calibre.web.feeds.main import option_parser as feeds2disk
from calibre.web.feeds.recipes import titles as feed_titles
from calibre.ebooks.lrf.comic.convert_from import option_parser as comicop
from calibre.ebooks.epub.from_comic import option_parser as comic2epub
from calibre.ebooks.metadata.fetch import option_parser as fem_op
from calibre.gui2.main import option_parser as guiop
from calibre.utils.smtp import option_parser as smtp_op
@ -181,10 +175,6 @@ def setup_completion(fatal_errors):
f.write(opts_and_exts('ebook-meta', metaop, list(meta_filetypes())))
f.write(opts_and_exts('lrfviewer', lrfviewerop, ['lrf']))
f.write(opts_and_exts('pdfrelow', pdfhtmlop, ['pdf']))
f.write(opts_and_exts('comic2lrf', comicop, ['cbz', 'cbr']))
f.write(opts_and_exts('comic2epub', comic2epub, ['cbz', 'cbr']))
f.write(opts_and_exts('comic2mobi', comic2epub, ['cbz', 'cbr']))
f.write(opts_and_exts('comic2pdf', comic2epub, ['cbz', 'cbr']))
f.write(opts_and_words('feeds2disk', feeds2disk, feed_titles))
f.write(opts_and_words('fetch-ebook-metadata', fem_op, []))
f.write(opts_and_words('calibre-smtp', smtp_op, []))

View File

@ -139,7 +139,7 @@ class resources(OptionlessCommand):
RESOURCES = dict(
opf_template = 'ebooks/metadata/opf.xml',
ncx_template = 'ebooks/metadata/ncx.xml',
fb2_xsl = 'ebooks/lrf/fb2/fb2.xsl',
fb2_xsl = 'ebooks/fb2/fb2.xsl',
metadata_sqlite = 'library/metadata_sqlite.sql',
jquery = 'gui2/viewer/jquery.js',
jquery_scrollTo = 'gui2/viewer/jquery_scrollTo.js',